diff options
Diffstat (limited to 'tools')
181 files changed, 7230 insertions, 2272 deletions
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 69d09c39bbcd..cd7359e23d86 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req { /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 +#define KVM_S390_VM_TOD_EXT 2 + +struct kvm_s390_vm_tod_clock { + __u8 epoch_idx; + __u64 tod; +}; /* kvm attributes for KVM_S390_VM_CPU_MODEL */ /* processor related attributes are r/w */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 8ea315a11fe0..2519c6c801c9 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -196,6 +196,7 @@ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ +#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ @@ -287,6 +288,7 @@ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 5dff775af7cd..c10c9128f54e 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -21,11 +21,13 @@ # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) +# define DISABLE_PCID 0 #else # define DISABLE_VME 0 # define DISABLE_K6_MTRR 0 # define DISABLE_CYRIX_ARR 0 # define DISABLE_CENTAUR_MCR 0 +# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -49,7 +51,7 @@ #define DISABLED_MASK1 0 #define DISABLED_MASK2 0 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) -#define DISABLED_MASK4 0 +#define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 #define DISABLED_MASK7 0 diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh deleted file mode 100755 index 89b25068cd98..000000000000 --- a/tools/hv/bondvf.sh +++ /dev/null @@ -1,232 +0,0 @@ -#!/bin/bash - -# This example script creates bonding network devices based on synthetic NIC -# (the virtual network adapter usually provided by Hyper-V) and the matching -# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can -# function as one network device, and fail over to the synthetic NIC if VF is -# down. -# -# Usage: -# - After configured vSwitch and vNIC with SRIOV, start Linux virtual -# machine (VM) -# - Run this scripts on the VM. It will create configuration files in -# distro specific directory. -# - Reboot the VM, so that the bonding config are enabled. -# -# The config files are DHCP by default. You may edit them if you need to change -# to Static IP or change other settings. -# - -sysdir=/sys/class/net -netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e} -bondcnt=0 - -# Detect Distro -if [ -f /etc/redhat-release ]; -then - cfgdir=/etc/sysconfig/network-scripts - distro=redhat -elif grep -q 'Ubuntu' /etc/issue -then - cfgdir=/etc/network - distro=ubuntu -elif grep -q 'SUSE' /etc/issue -then - cfgdir=/etc/sysconfig/network - distro=suse -else - echo "Unsupported Distro" - exit 1 -fi - -echo Detected Distro: $distro, or compatible - -# Get a list of ethernet names -list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`) -eth_cnt=${#list_eth[@]} - -echo List of net devices: - -# Get the MAC addresses -for (( i=0; i < $eth_cnt; i++ )) -do - list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address` - echo ${list_eth[$i]}, ${list_mac[$i]} -done - -# Find NIC with matching MAC -for (( i=0; i < $eth_cnt-1; i++ )) -do - for (( j=i+1; j < $eth_cnt; j++ )) - do - if [ "${list_mac[$i]}" = "${list_mac[$j]}" ] - then - list_match[$i]=${list_eth[$j]} - break - fi - done -done - -function create_eth_cfg_redhat { - local fn=$cfgdir/ifcfg-$1 - - rm -f $fn - echo DEVICE=$1 >>$fn - echo TYPE=Ethernet >>$fn - echo BOOTPROTO=none >>$fn - echo UUID=`uuidgen` >>$fn - echo ONBOOT=yes >>$fn - echo PEERDNS=yes >>$fn - echo IPV6INIT=yes >>$fn - echo MASTER=$2 >>$fn - echo SLAVE=yes >>$fn -} - -function create_eth_cfg_pri_redhat { - create_eth_cfg_redhat $1 $2 -} - -function create_bond_cfg_redhat { - local fn=$cfgdir/ifcfg-$1 - - rm -f $fn - echo DEVICE=$1 >>$fn - echo TYPE=Bond >>$fn - echo BOOTPROTO=dhcp >>$fn - echo UUID=`uuidgen` >>$fn - echo ONBOOT=yes >>$fn - echo PEERDNS=yes >>$fn - echo IPV6INIT=yes >>$fn - echo BONDING_MASTER=yes >>$fn - echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn -} - -function del_eth_cfg_ubuntu { - local mainfn=$cfgdir/interfaces - local fnlist=( $mainfn ) - - local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`) - - local i - for i in "${dirlist[@]}" - do - fnlist+=(`ls $i 2>/dev/null`) - done - - local tmpfl=$(mktemp) - - local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1 - local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)' - - local fn - for fn in "${fnlist[@]}" - do - awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \ - $fn >$tmpfl - - cp $tmpfl $fn - done - - rm $tmpfl -} - -function create_eth_cfg_ubuntu { - local fn=$cfgdir/interfaces - - del_eth_cfg_ubuntu $1 - echo $'\n'auto $1 >>$fn - echo iface $1 inet manual >>$fn - echo bond-master $2 >>$fn -} - -function create_eth_cfg_pri_ubuntu { - local fn=$cfgdir/interfaces - - del_eth_cfg_ubuntu $1 - echo $'\n'allow-hotplug $1 >>$fn - echo iface $1 inet manual >>$fn - echo bond-master $2 >>$fn - echo bond-primary $1 >>$fn -} - -function create_bond_cfg_ubuntu { - local fn=$cfgdir/interfaces - - del_eth_cfg_ubuntu $1 - - echo $'\n'auto $1 >>$fn - echo iface $1 inet dhcp >>$fn - echo bond-mode active-backup >>$fn - echo bond-miimon 100 >>$fn - echo bond-slaves none >>$fn -} - -function create_eth_cfg_suse { - local fn=$cfgdir/ifcfg-$1 - - rm -f $fn - echo BOOTPROTO=none >>$fn - echo STARTMODE=auto >>$fn -} - -function create_eth_cfg_pri_suse { - local fn=$cfgdir/ifcfg-$1 - - rm -f $fn - echo BOOTPROTO=none >>$fn - echo STARTMODE=hotplug >>$fn -} - -function create_bond_cfg_suse { - local fn=$cfgdir/ifcfg-$1 - - rm -f $fn - echo BOOTPROTO=dhcp >>$fn - echo STARTMODE=auto >>$fn - echo BONDING_MASTER=yes >>$fn - echo BONDING_SLAVE_0=$2 >>$fn - echo BONDING_SLAVE_1=$3 >>$fn - echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn -} - -function create_bond { - local bondname=bond$bondcnt - local primary - local secondary - - local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null` - local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null` - - if [ "$class_id1" = "$netvsc_cls" ] - then - primary=$2 - secondary=$1 - elif [ "$class_id2" = "$netvsc_cls" ] - then - primary=$1 - secondary=$2 - else - return 0 - fi - - echo $'\nBond name:' $bondname - - echo configuring $primary - create_eth_cfg_pri_$distro $primary $bondname - - echo configuring $secondary - create_eth_cfg_$distro $secondary $bondname - - echo creating: $bondname with primary slave: $primary - create_bond_cfg_$distro $bondname $primary $secondary - - let bondcnt=bondcnt+1 -} - -for (( i=0; i < $eth_cnt-1; i++ )) -do - if [ -n "${list_match[$i]}" ] - then - create_bond ${list_eth[$i]} ${list_match[$i]} - fi -done diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h new file mode 100644 index 000000000000..e4732d3c2998 --- /dev/null +++ b/tools/include/asm-generic/hugetlb_encode.h @@ -0,0 +1,34 @@ +#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_ +#define _ASM_GENERIC_HUGETLB_ENCODE_H_ + +/* + * Several system calls take a flag to request "hugetlb" huge pages. + * Without further specification, these system calls will use the + * system's default huge page size. If a system supports multiple + * huge page sizes, the desired huge page size can be specified in + * bits [26:31] of the flag arguments. The value in these 6 bits + * will encode the log2 of the huge page size. + * + * The following definitions are associated with this huge page size + * encoding in flag arguments. System call specific header files + * that use this encoding should include this file. They can then + * provide definitions based on these with their own specific prefix. + * for example: + * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT + */ + +#define HUGETLB_FLAG_ENCODE_SHIFT 26 +#define HUGETLB_FLAG_ENCODE_MASK 0x3f + +#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) + +#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index bd39b2090ad1..3723b9f8f964 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -21,11 +21,14 @@ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) #define noinline __attribute__((noinline)) - +#ifndef __packed #define __packed __attribute__((packed)) - +#endif +#ifndef __noreturn #define __noreturn __attribute__((noreturn)) - +#endif +#ifndef __aligned #define __aligned(x) __attribute__((aligned(x))) +#endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 8c27db0c5c08..203268f9231e 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -58,20 +58,12 @@ overrides the coredump filter bits */ #define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ +#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ +#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ + /* compatibility flags */ #define MAP_FILE 0 -/* - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define MAP_HUGE_SHIFT 26 -#define MAP_HUGE_MASK 0x3f - #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 101593ab10ac..97677cd6964d 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -700,6 +700,7 @@ struct drm_prime_handle { struct drm_syncobj_create { __u32 handle; +#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) __u32 flags; }; @@ -718,6 +719,24 @@ struct drm_syncobj_handle { __u32 pad; }; +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) +#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) +struct drm_syncobj_wait { + __u64 handles; + /* absolute timeout */ + __s64 timeout_nsec; + __u32 count_handles; + __u32 flags; + __u32 first_signaled; /* only valid when not waiting all */ + __u32 pad; +}; + +struct drm_syncobj_array { + __u64 handles; + __u32 count_handles; + __u32 pad; +}; + #if defined(__cplusplus) } #endif @@ -840,6 +859,9 @@ extern "C" { #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) +#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) +#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) +#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) /** * Device specific ioctls should only be in their respective headers diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 7ccbd6a2bbe0..6598fb76d2c2 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea { #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 #define DRM_I915_PERF_OPEN 0x36 +#define DRM_I915_PERF_ADD_CONFIG 0x37 +#define DRM_I915_PERF_REMOVE_CONFIG 0x38 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea { #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) +#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) +#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) /* Allow drivers to submit batchbuffers directly to hardware, relying * on the security mechanisms provided by hardware. @@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + typedef struct drm_i915_getparam { __s32 param; /* @@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 { __u64 rsvd2; }; +struct drm_i915_gem_exec_fence { + /** + * User's handle for a drm_syncobj to wait on or signal. + */ + __u32 handle; + +#define I915_EXEC_FENCE_WAIT (1<<0) +#define I915_EXEC_FENCE_SIGNAL (1<<1) + __u32 flags; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 { __u32 DR1; __u32 DR4; __u32 num_cliprects; - /** This is a struct drm_clip_rect *cliprects */ + /** + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a + * struct drm_i915_gem_exec_fence *fences. + */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (7<<0) #define I915_EXEC_DEFAULT (0<<0) @@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 { * element). */ #define I915_EXEC_BATCH_FIRST (1<<18) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) + +/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr + * define an array of i915_gem_exec_fence structures which specify a set of + * dma fences to wait upon or signal. + */ +#define I915_EXEC_FENCE_ARRAY (1<<19) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ @@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type { DRM_I915_PERF_RECORD_MAX /* non-ABI */ }; +/** + * Structure to upload perf dynamic configuration into the kernel. + */ +struct drm_i915_perf_oa_config { + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ + char uuid[36]; + + __u32 n_mux_regs; + __u32 n_boolean_regs; + __u32 n_flex_regs; + + __u64 __user mux_regs_ptr; + __u64 __user boolean_regs_ptr; + __u64 __user flex_regs_ptr; +}; + #if defined(__cplusplus) } #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e99e3e6f8b37..43ab5c402f98 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -30,9 +30,14 @@ #define BPF_FROM_LE BPF_TO_LE #define BPF_FROM_BE BPF_TO_BE +/* jmp encodings */ #define BPF_JNE 0x50 /* jump != */ +#define BPF_JLT 0xa0 /* LT is unsigned, '<' */ +#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */ #define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */ #define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */ +#define BPF_JSLT 0xc0 /* SLT is signed, '<' */ +#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */ #define BPF_CALL 0x80 /* function call */ #define BPF_EXIT 0x90 /* function return */ @@ -104,6 +109,8 @@ enum bpf_map_type { BPF_MAP_TYPE_LPM_TRIE, BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH_OF_MAPS, + BPF_MAP_TYPE_DEVMAP, + BPF_MAP_TYPE_SOCKMAP, }; enum bpf_prog_type { @@ -121,6 +128,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, BPF_PROG_TYPE_SOCK_OPS, + BPF_PROG_TYPE_SK_SKB, }; enum bpf_attach_type { @@ -128,6 +136,8 @@ enum bpf_attach_type { BPF_CGROUP_INET_EGRESS, BPF_CGROUP_INET_SOCK_CREATE, BPF_CGROUP_SOCK_OPS, + BPF_SK_SKB_STREAM_PARSER, + BPF_SK_SKB_STREAM_VERDICT, __MAX_BPF_ATTACH_TYPE }; @@ -153,6 +163,7 @@ enum bpf_attach_type { #define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_EXIST 2 /* update existing element */ +/* flags for BPF_MAP_CREATE command */ #define BPF_F_NO_PREALLOC (1U << 0) /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list @@ -161,6 +172,8 @@ enum bpf_attach_type { * across different LRU lists. */ #define BPF_F_NO_COMMON_LRU (1U << 1) +/* Specify numa node during map creation */ +#define BPF_F_NUMA_NODE (1U << 2) union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ @@ -168,8 +181,13 @@ union bpf_attr { __u32 key_size; /* size of key in bytes */ __u32 value_size; /* size of value in bytes */ __u32 max_entries; /* max number of entries in a map */ - __u32 map_flags; /* prealloc or not */ + __u32 map_flags; /* BPF_MAP_CREATE related + * flags defined above. + */ __u32 inner_map_fd; /* fd pointing to the inner map */ + __u32 numa_node; /* numa node (effective only if + * BPF_F_NUMA_NODE is set). + */ }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -344,9 +362,20 @@ union bpf_attr { * int bpf_redirect(ifindex, flags) * redirect to another netdev * @ifindex: ifindex of the net device - * @flags: bit 0 - if set, redirect to ingress instead of egress - * other bits - reserved - * Return: TC_ACT_REDIRECT + * @flags: + * cls_bpf: + * bit 0 - if set, redirect to ingress instead of egress + * other bits - reserved + * xdp_bpf: + * all bits - reserved + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error + * int bpf_redirect_map(map, key, flags) + * redirect to endpoint in map + * @map: pointer to dev map + * @key: index in map to lookup + * @flags: -- + * Return: XDP_REDIRECT on success or XDP_ABORT on error * * u32 bpf_get_route_realm(skb) * retrieve a dst's tclassid @@ -539,6 +568,20 @@ union bpf_attr { * @mode: operation mode (enum bpf_adj_room_mode) * @flags: reserved for future use * Return: 0 on success or negative error code + * + * int bpf_sk_redirect_map(map, key, flags) + * Redirect skb to a sock in map using key as a lookup key for the + * sock in map. + * @map: pointer to sockmap + * @key: key to lookup sock in map + * @flags: reserved for future use + * Return: SK_REDIRECT + * + * int bpf_sock_map_update(skops, map, key, flags) + * @skops: pointer to bpf_sock_ops + * @map: pointer to sockmap to update + * @key: key to insert/update sock in map + * @flags: same flags as map update elem */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -591,7 +634,10 @@ union bpf_attr { FN(get_socket_uid), \ FN(set_hash), \ FN(setsockopt), \ - FN(skb_adjust_room), + FN(skb_adjust_room), \ + FN(redirect_map), \ + FN(sk_redirect_map), \ + FN(sock_map_update), \ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -668,6 +714,15 @@ struct __sk_buff { __u32 data; __u32 data_end; __u32 napi_id; + + /* accessed by BPF_PROG_TYPE_sk_skb types */ + __u32 family; + __u32 remote_ip4; /* Stored in network byte order */ + __u32 local_ip4; /* Stored in network byte order */ + __u32 remote_ip6[4]; /* Stored in network byte order */ + __u32 local_ip6[4]; /* Stored in network byte order */ + __u32 remote_port; /* Stored in network byte order */ + __u32 local_port; /* stored in host byte order */ }; struct bpf_tunnel_key { @@ -703,20 +758,23 @@ struct bpf_sock { __u32 family; __u32 type; __u32 protocol; + __u32 mark; + __u32 priority; }; #define XDP_PACKET_HEADROOM 256 /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other - * return codes are reserved for future use. Unknown return codes will result - * in packet drop. + * return codes are reserved for future use. Unknown return codes will + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). */ enum xdp_action { XDP_ABORTED = 0, XDP_DROP, XDP_PASS, XDP_TX, + XDP_REDIRECT, }; /* user accessible metadata for XDP packet hook @@ -727,6 +785,12 @@ struct xdp_md { __u32 data_end; }; +enum sk_action { + SK_ABORTED = 0, + SK_DROP, + SK_REDIRECT, +}; + #define BPF_TAG_SIZE 8 struct bpf_prog_info { diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 6cd63c18708a..838887587411 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size { struct kvm_ppc_smmu_info { __u64 flags; __u32 slb_size; - __u32 pad; + __u16 data_keys; /* # storage keys supported for data */ + __u16 instr_keys; /* # storage keys supported for instructions */ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 81d8edf11789..a937480d7cd3 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -1,7 +1,8 @@ #ifndef _UAPI_LINUX_MMAN_H #define _UAPI_LINUX_MMAN_H -#include <uapi/asm/mman.h> +#include <asm/mman.h> +#include <asm-generic/hugetlb_encode.h> #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 @@ -10,4 +11,25 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + #endif /* _UAPI_LINUX_MMAN_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 2a37ae925d85..140ae638cfd6 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -139,8 +139,9 @@ enum perf_event_sample_format { PERF_SAMPLE_IDENTIFIER = 1U << 16, PERF_SAMPLE_TRANSACTION = 1U << 17, PERF_SAMPLE_REGS_INTR = 1U << 18, + PERF_SAMPLE_PHYS_ADDR = 1U << 19, - PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */ }; /* @@ -814,6 +815,7 @@ enum perf_event_type { * { u64 transaction; } && PERF_SAMPLE_TRANSACTION * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR + * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR * }; */ PERF_RECORD_SAMPLE = 9, diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 4563ba7ede6f..1e83e3c07448 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -17,13 +17,19 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC +ifeq ($(DEBUG),0) ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 endif +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif # Treat warnings as errors unless directed not to ifneq ($(WERROR),0) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 4ed0257dc1f3..d2441db34740 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -189,6 +189,10 @@ install_lib: all_cmd $(call QUIET_INSTALL, $(LIB_FILE)) \ $(call do_install,$(LIB_FILE),$(libdir_SQ)) +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,bpf.h,$(prefix)/include/bpf,644) + install: install_lib ### Cleaning rules diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index e5bbb090bf88..1d6907d379c9 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -57,8 +57,9 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } -int bpf_create_map(enum bpf_map_type map_type, int key_size, - int value_size, int max_entries, __u32 map_flags) +int bpf_create_map_node(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags, + int node) { union bpf_attr attr; @@ -69,12 +70,24 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, attr.value_size = value_size; attr.max_entries = max_entries; attr.map_flags = map_flags; + if (node >= 0) { + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; + } return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } -int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, - int inner_map_fd, int max_entries, __u32 map_flags) +int bpf_create_map(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags) +{ + return bpf_create_map_node(map_type, key_size, value_size, + max_entries, map_flags, -1); +} + +int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node) { union bpf_attr attr; @@ -86,10 +99,21 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, attr.inner_map_fd = inner_map_fd; attr.max_entries = max_entries; attr.map_flags = map_flags; + if (node >= 0) { + attr.map_flags |= BPF_F_NUMA_NODE; + attr.numa_node = node; + } return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr)); } +int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, __u32 map_flags) +{ + return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd, + max_entries, map_flags, -1); +} + int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, size_t insns_cnt, const char *license, __u32 kern_version, char *log_buf, size_t log_buf_sz) diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 418c86e69bcb..b8ea5843c39e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -24,8 +24,14 @@ #include <linux/bpf.h> #include <stddef.h> +int bpf_create_map_node(enum bpf_map_type map_type, int key_size, + int value_size, int max_entries, __u32 map_flags, + int node); int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int max_entries, __u32 map_flags); +int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size, + int inner_map_fd, int max_entries, + __u32 map_flags, int node); int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size, int inner_map_fd, int max_entries, __u32 map_flags); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8c67a90dbd82..35f6dfcdc565 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1745,3 +1745,32 @@ long libbpf_get_error(const void *ptr) return PTR_ERR(ptr); return 0; } + +int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd) +{ + struct bpf_program *prog; + struct bpf_object *obj; + int err; + + obj = bpf_object__open(file); + if (IS_ERR(obj)) + return -ENOENT; + + prog = bpf_program__next(NULL, obj); + if (!prog) { + bpf_object__close(obj); + return -ENOENT; + } + + bpf_program__set_type(prog, type); + err = bpf_object__load(obj); + if (err) { + bpf_object__close(obj); + return -EINVAL; + } + + *pobj = obj; + *prog_fd = bpf_program__fd(prog); + return 0; +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 32c7252f734e..7959086eb9c9 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -243,4 +243,6 @@ int bpf_map__pin(struct bpf_map *map, const char *path); long libbpf_get_error(const void *ptr); +int bpf_prog_load(const char *file, enum bpf_prog_type type, + struct bpf_object **pobj, int *prog_fd); #endif diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 6a1af43862df..3995735a878f 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them. If it's a GCC-compiled .c file, the error may be because the function uses an inline asm() statement which has a "call" instruction. An asm() statement with a call instruction must declare the use of the - stack pointer in its output operand. For example, on x86_64: + stack pointer in its output operand. On x86_64, this means adding + the ASM_CALL_CONSTRAINT as an output constraint: - register void *__sp asm("rsp"); - asm volatile("call func" : "+r" (__sp)); + asm volatile("call func" : ASM_CALL_CONSTRAINT); Otherwise the stack frame may not get created before the call. diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 0e8c8ec4fd4e..34a579f806e3 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, break; case 0x89: - if (rex == 0x48 && modrm == 0xe5) { + if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { - /* mov %rsp, %rbp */ + /* mov %rsp, reg */ *type = INSN_STACK; op->src.type = OP_SRC_REG; op->src.reg = CFI_SP; op->dest.type = OP_DEST_REG; - op->dest.reg = CFI_BP; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; break; } @@ -284,11 +284,16 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, case 0x8d: if (sib == 0x24 && rex_w && !rex_b && !rex_x) { - /* lea disp(%rsp), reg */ *type = INSN_STACK; - op->src.type = OP_SRC_ADD; + if (!insn.displacement.value) { + /* lea (%rsp), reg */ + op->src.type = OP_SRC_REG; + } else { + /* lea disp(%rsp), reg */ + op->src.type = OP_SRC_ADD; + op->src.offset = insn.displacement.value; + } op->src.reg = CFI_SP; - op->src.offset = insn.displacement.value; op->dest.type = OP_DEST_REG; op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f744617c9946..a0c518ecf085 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state) switch (op->src.type) { case OP_SRC_REG: - if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) { + if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && + cfa->base == CFI_SP && + regs[CFI_BP].base == CFI_CFA && + regs[CFI_BP].offset == -cfa->offset) { + + /* mov %rsp, %rbp */ + cfa->base = op->dest.reg; + state->bp_scratch = false; + } - if (cfa->base == CFI_SP && - regs[CFI_BP].base == CFI_CFA && - regs[CFI_BP].offset == -cfa->offset) { + else if (op->src.reg == CFI_SP && + op->dest.reg == CFI_BP && state->drap) { - /* mov %rsp, %rbp */ - cfa->base = op->dest.reg; - state->bp_scratch = false; - } + /* drap: mov %rsp, %rbp */ + regs[CFI_BP].base = CFI_BP; + regs[CFI_BP].offset = -state->stack_size; + state->bp_scratch = false; + } - else if (state->drap) { + else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { - /* drap: mov %rsp, %rbp */ - regs[CFI_BP].base = CFI_BP; - regs[CFI_BP].offset = -state->stack_size; - state->bp_scratch = false; - } + /* + * mov %rsp, %reg + * + * This is needed for the rare case where GCC + * does: + * + * mov %rsp, %rax + * ... + * mov %rax, %rsp + */ + state->vals[op->dest.reg].base = CFI_CFA; + state->vals[op->dest.reg].offset = -state->stack_size; } else if (op->dest.reg == cfa->base) { diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 6e9f980a7d26..24460155c82c 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -175,19 +175,20 @@ static int read_sections(struct elf *elf) return -1; } - sec->data = elf_getdata(s, NULL); - if (!sec->data) { - WARN_ELF("elf_getdata"); - return -1; - } - - if (sec->data->d_off != 0 || - sec->data->d_size != sec->sh.sh_size) { - WARN("unexpected data attributes for %s", sec->name); - return -1; + if (sec->sh.sh_size != 0) { + sec->data = elf_getdata(s, NULL); + if (!sec->data) { + WARN_ELF("elf_getdata"); + return -1; + } + if (sec->data->d_off != 0 || + sec->data->d_size != sec->sh.sh_size) { + WARN("unexpected data attributes for %s", + sec->name); + return -1; + } } - - sec->len = sec->data->d_size; + sec->len = sec->sh.sh_size; } /* sanity check, one more call to elf_nextscn() should return NULL */ @@ -508,6 +509,7 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base) strcat(relaname, base->name); sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0); + free(relaname); if (!sec) return NULL; @@ -561,6 +563,7 @@ int elf_write(struct elf *elf) struct section *sec; Elf_Scn *s; + /* Update section headers for changed sections: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { s = elf_getscn(elf->elf, sec->idx); @@ -568,13 +571,17 @@ int elf_write(struct elf *elf) WARN_ELF("elf_getscn"); return -1; } - if (!gelf_update_shdr (s, &sec->sh)) { + if (!gelf_update_shdr(s, &sec->sh)) { WARN_ELF("gelf_update_shdr"); return -1; } } } + /* Make sure the new section header entries get updated properly. */ + elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY); + + /* Write all changes to the file. */ if (elf_update(elf->elf, ELF_C_WRITE) < 0) { WARN_ELF("elf_update"); return -1; diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index ad54a58d7dda..9074b477bff0 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -173,6 +173,7 @@ usage: "\t-D <dev> PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n" "\t-b <bar num> BAR test (bar number between 0..5)\n" "\t-m <msi num> MSI test (msi number between 1..32)\n" + "\t-l Legacy IRQ test\n" "\t-r Read buffer test\n" "\t-w Write buffer test\n" "\t-c Copy buffer test\n" diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt index ab1b0825130a..76971d2e4164 100644 --- a/tools/perf/Documentation/intel-pt.txt +++ b/tools/perf/Documentation/intel-pt.txt @@ -873,7 +873,7 @@ amended to take the number of elements as a parameter. $ cat ~/.perfconfig [intel-pt] - mispred-all + mispred-all = on $ perf record -e intel_pt//u ./sort 3000 Bubble sorting array of 3000 elements diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 73496320fca3..4be08a1e3f8d 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -59,6 +59,10 @@ OPTIONS --ldload:: Specify desired latency for loads event. +-p:: +--phys-data:: + Record/Report sample physical addresses + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9bdea047c5db..e397453e5a46 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -249,7 +249,10 @@ OPTIONS -d:: --data:: - Record the sample addresses. + Record the sample virtual addresses. + +--phys-data:: + Record the sample physical addresses. -T:: --timestamp:: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9fa84617181e..383a98d992ed 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -137,6 +137,7 @@ OPTIONS - mem: type of memory access for the data at the time of the sample - snoop: type of snoop (if any) for the data at the time of the sample - dcacheline: the cacheline the data address is on at the time of the sample + - phys_daddr: physical address of data being executed on at the time of sample And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 5ee8796be96e..18dfcfa38454 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff, - callindent, insn, insnlen, synth. + callindent, insn, insnlen, synth, phys_addr. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index c1e3288a2dfb..d53bea6bd571 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -37,7 +37,7 @@ OPTIONS --expr:: --event:: List of syscalls and other perf events (tracepoints, HW cache events, - etc) to show. + etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc. See 'perf list' for a complete list of events. Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 62072822dc85..627b7cada144 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,34 +1,8 @@ tools/perf -tools/arch/alpha/include/asm/barrier.h -tools/arch/arm/include/asm/barrier.h -tools/arch/arm64/include/asm/barrier.h -tools/arch/ia64/include/asm/barrier.h -tools/arch/mips/include/asm/barrier.h -tools/arch/powerpc/include/asm/barrier.h -tools/arch/s390/include/asm/barrier.h -tools/arch/sh/include/asm/barrier.h -tools/arch/sparc/include/asm/barrier.h -tools/arch/sparc/include/asm/barrier_32.h -tools/arch/sparc/include/asm/barrier_64.h -tools/arch/tile/include/asm/barrier.h -tools/arch/x86/include/asm/barrier.h -tools/arch/x86/include/asm/cmpxchg.h -tools/arch/x86/include/asm/cpufeatures.h -tools/arch/x86/include/asm/disabled-features.h -tools/arch/x86/include/asm/required-features.h -tools/arch/x86/include/uapi/asm/svm.h -tools/arch/x86/include/uapi/asm/vmx.h -tools/arch/x86/include/uapi/asm/kvm.h -tools/arch/x86/include/uapi/asm/kvm_perf.h -tools/arch/x86/lib/memcpy_64.S -tools/arch/x86/lib/memset_64.S -tools/arch/s390/include/uapi/asm/kvm_perf.h -tools/arch/s390/include/uapi/asm/sie.h -tools/arch/xtensa/include/asm/barrier.h +tools/arch tools/scripts tools/build -tools/arch/x86/include/asm/atomic.h -tools/arch/x86/include/asm/rmwcc.h +tools/include tools/lib/traceevent tools/lib/api tools/lib/bpf @@ -42,60 +16,3 @@ tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/str_error_r.c tools/lib/vsprintf.c -tools/include/asm/alternative-asm.h -tools/include/asm/atomic.h -tools/include/asm/barrier.h -tools/include/asm/bug.h -tools/include/asm-generic/atomic-gcc.h -tools/include/asm-generic/barrier.h -tools/include/asm-generic/bitops/arch_hweight.h -tools/include/asm-generic/bitops/atomic.h -tools/include/asm-generic/bitops/const_hweight.h -tools/include/asm-generic/bitops/__ffs.h -tools/include/asm-generic/bitops/__ffz.h -tools/include/asm-generic/bitops/__fls.h -tools/include/asm-generic/bitops/find.h -tools/include/asm-generic/bitops/fls64.h -tools/include/asm-generic/bitops/fls.h -tools/include/asm-generic/bitops/hweight.h -tools/include/asm-generic/bitops.h -tools/include/linux/atomic.h -tools/include/linux/bitops.h -tools/include/linux/compiler.h -tools/include/linux/compiler-gcc.h -tools/include/linux/coresight-pmu.h -tools/include/linux/bug.h -tools/include/linux/filter.h -tools/include/linux/hash.h -tools/include/linux/kernel.h -tools/include/linux/list.h -tools/include/linux/log2.h -tools/include/uapi/asm-generic/fcntl.h -tools/include/uapi/asm-generic/ioctls.h -tools/include/uapi/asm-generic/mman-common.h -tools/include/uapi/asm-generic/mman.h -tools/include/uapi/drm/drm.h -tools/include/uapi/drm/i915_drm.h -tools/include/uapi/linux/bpf.h -tools/include/uapi/linux/bpf_common.h -tools/include/uapi/linux/fcntl.h -tools/include/uapi/linux/hw_breakpoint.h -tools/include/uapi/linux/kvm.h -tools/include/uapi/linux/mman.h -tools/include/uapi/linux/perf_event.h -tools/include/uapi/linux/sched.h -tools/include/uapi/linux/stat.h -tools/include/uapi/linux/vhost.h -tools/include/uapi/sound/asound.h -tools/include/linux/poison.h -tools/include/linux/rbtree.h -tools/include/linux/rbtree_augmented.h -tools/include/linux/refcount.h -tools/include/linux/string.h -tools/include/linux/stringify.h -tools/include/linux/types.h -tools/include/linux/err.h -tools/include/linux/bitmap.h -tools/include/linux/time64.h -tools/arch/*/include/uapi/asm/mman.h -tools/arch/*/include/uapi/asm/perf_regs.h diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index bd518b623d7a..5bd7b9260cc0 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -1,5 +1,4 @@ libperf-y += header.o -libperf-y += sym-handling.o libperf-y += kvm-stat.o libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c deleted file mode 100644 index e103f6e46afe..000000000000 --- a/tools/perf/arch/s390/util/sym-handling.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Architecture specific ELF symbol handling and relocation mapping. - * - * Copyright 2017 IBM Corp. - * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - */ - -#include "symbol.h" - -#ifdef HAVE_LIBELF_SUPPORT -bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) -{ - if (ehdr.e_type == ET_EXEC) - return false; - return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN; -} - -void arch__adjust_sym_map_offset(GElf_Sym *sym, - GElf_Shdr *shdr __maybe_unused, - struct map *map) -{ - if (map->type == MAP__FUNCTION) - sym->st_value += map->start; -} -#endif diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c index 3ddcc6e2abeb..a1d82e33282c 100644 --- a/tools/perf/builtin-config.c +++ b/tools/perf/builtin-config.c @@ -59,7 +59,7 @@ static int set_config(struct perf_config_set *set, const char *file_name, fprintf(fp, "[%s]\n", section->name); perf_config_items__for_each_entry(§ion->items, item) { - if (!use_system_config && section->from_system_config) + if (!use_system_config && item->from_system_config) continue; if (item->value) fprintf(fp, "\t%s = %s\n", diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a1497c516d85..24ee68ecdd42 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -627,7 +627,6 @@ static const struct { { "GFP_HIGHUSER_MOVABLE", "HUM" }, { "GFP_HIGHUSER", "HU" }, { "GFP_USER", "U" }, - { "GFP_TEMPORARY", "TMP" }, { "GFP_KERNEL_ACCOUNT", "KAC" }, { "GFP_KERNEL", "K" }, { "GFP_NOFS", "NF" }, diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index e001c0290793..0f15634ef82c 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -23,6 +23,7 @@ struct perf_mem { bool hide_unresolved; bool dump_raw; bool force; + bool phys_addr; int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) rec_argv[i++] = "-d"; + if (mem->phys_addr) + rec_argv[i++] = "--phys-data"; + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { if (!perf_mem_events[j].record) continue; @@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - if (symbol_conf.field_sep) { - fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 - "%s0x%"PRIx64"%s%s:%s\n"; + if (mem->phys_addr) { + if (symbol_conf.field_sep) { + fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64 + "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n"; + } else { + fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 + "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64 + "%s%s:%s\n"; + symbol_conf.field_sep = " "; + } + + printf(fmt, + sample->pid, + symbol_conf.field_sep, + sample->tid, + symbol_conf.field_sep, + sample->ip, + symbol_conf.field_sep, + sample->addr, + symbol_conf.field_sep, + sample->phys_addr, + symbol_conf.field_sep, + sample->weight, + symbol_conf.field_sep, + sample->data_src, + symbol_conf.field_sep, + al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", + al.sym ? al.sym->name : "???"); } else { - fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 - "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; - symbol_conf.field_sep = " "; - } + if (symbol_conf.field_sep) { + fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64 + "%s0x%"PRIx64"%s%s:%s\n"; + } else { + fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64 + "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n"; + symbol_conf.field_sep = " "; + } - printf(fmt, - sample->pid, - symbol_conf.field_sep, - sample->tid, - symbol_conf.field_sep, - sample->ip, - symbol_conf.field_sep, - sample->addr, - symbol_conf.field_sep, - sample->weight, - symbol_conf.field_sep, - sample->data_src, - symbol_conf.field_sep, - al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", - al.sym ? al.sym->name : "???"); + printf(fmt, + sample->pid, + symbol_conf.field_sep, + sample->tid, + symbol_conf.field_sep, + sample->ip, + symbol_conf.field_sep, + sample->addr, + symbol_conf.field_sep, + sample->weight, + symbol_conf.field_sep, + sample->data_src, + symbol_conf.field_sep, + al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", + al.sym ? al.sym->name : "???"); + } out_put: addr_location__put(&al); return 0; @@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem) if (ret < 0) goto out_delete; - printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); + if (mem->phys_addr) + printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); + else + printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); ret = perf_session__process_events(session); @@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem) * there is no weight (cost) associated with stores, so don't print * the column */ - if (!(mem->operation & MEM_OPERATION_LOAD)) - rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," - "dso_daddr,tlb,locked"; + if (!(mem->operation & MEM_OPERATION_LOAD)) { + if (mem->phys_addr) + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked,phys_daddr"; + else + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr," + "dso_daddr,tlb,locked"; + } else if (mem->phys_addr) + rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr," + "dso_daddr,snoop,tlb,locked,phys_daddr"; for (j = 1; j < argc; j++, i++) rep_argv[i] = argv[j]; @@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv) "separator for columns, no spaces will be added" " between columns '.' is reserved."), OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), + OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 36d7117a7562..56f8142ff97f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1604,6 +1604,8 @@ static struct option __record_options[] = { OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), + OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr, + "Record the sample physical addresses"), OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"), OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, &record.opts.sample_time_set, diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 378f76cdf923..3d4c3b5e1868 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -87,6 +87,7 @@ enum perf_output_field { PERF_OUTPUT_BRSTACKINSN = 1U << 23, PERF_OUTPUT_BRSTACKOFF = 1U << 24, PERF_OUTPUT_SYNTH = 1U << 25, + PERF_OUTPUT_PHYS_ADDR = 1U << 26, }; struct output_option { @@ -119,6 +120,7 @@ struct output_option { {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN}, {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, {.str = "synth", .field = PERF_OUTPUT_SYNTH}, + {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, }; enum { @@ -175,7 +177,8 @@ static struct { PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP | PERF_OUTPUT_SYM | PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD | PERF_OUTPUT_ADDR | - PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT, + PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT | + PERF_OUTPUT_PHYS_ADDR, .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT, }, @@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_IREGS)) return -EINVAL; + if (PRINT_FIELD(PHYS_ADDR) && + perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", + PERF_OUTPUT_PHYS_ADDR)) + return -EINVAL; + return 0; } @@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script, if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT)) print_sample_bpf_output(sample); print_insn(sample, attr, thread, machine); + + if (PRINT_FIELD(PHYS_ADDR)) + printf("%16" PRIx64, sample->phys_addr); printf("\n"); } @@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv) "Valid types: hw,sw,trace,raw,synth. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," "addr,symoff,period,iregs,brstack,brstacksym,flags," - "bpf-output,callindent,insn,insnlen,brstackinsn,synth", + "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 866da7aa54bf..69523ed55894 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -707,7 +707,7 @@ try_again: process_interval(); } } - wait(&status); + waitpid(child_pid, &status, 0); if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); @@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter, if (counter->merged_stat) return false; cb(counter, data, true); - if (!no_merge) + if (!no_merge && counter->auto_merge_stats) collect_all_aliases(counter, cb, data); return true; } diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d59cdadf3a79..771ddab94bb0 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id) static int trace__validate_ev_qualifier(struct trace *trace) { int err = 0, i; + size_t nr_allocated; struct str_node *pos; trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); @@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace) goto out; } + nr_allocated = trace->ev_qualifier_ids.nr; i = 0; strlist__for_each_entry(pos, trace->ev_qualifier) { const char *sc = pos->s; - int id = syscalltbl__id(trace->sctbl, sc); + int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; if (id < 0) { + id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); + if (id >= 0) + goto matches; + if (err == 0) { fputs("Error:\tInvalid syscall ", trace->output); err = -EINVAL; @@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace) fputs(sc, trace->output); } - +matches: trace->ev_qualifier_ids.entries[i++] = id; + if (match_next == -1) + continue; + + while (1) { + id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); + if (id < 0) + break; + if (nr_allocated == trace->ev_qualifier_ids.nr) { + void *entries; + + nr_allocated += 8; + entries = realloc(trace->ev_qualifier_ids.entries, + nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); + if (entries == NULL) { + err = -ENOMEM; + fputs("\nError:\t Not enough memory for parsing\n", trace->output); + goto out_free; + } + trace->ev_qualifier_ids.entries = entries; + } + trace->ev_qualifier_ids.nr++; + trace->ev_qualifier_ids.entries[i++] = id; + } } if (err < 0) { fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" "\nHint:\tand: 'man syscalls'\n", trace->output); +out_free: zfree(&trace->ev_qualifier_ids.entries); trace->ev_qualifier_ids.nr = 0; } @@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str, struct trace *trace = (struct trace *)opt->value; const char *s = str; char *sep = NULL, *lists[2] = { NULL, NULL, }; - int len = strlen(str) + 1, err = -1, list; + int len = strlen(str) + 1, err = -1, list, idx; char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); char group_name[PATH_MAX]; @@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str, *sep = '\0'; list = 0; - if (syscalltbl__id(trace->sctbl, s) >= 0) { + if (syscalltbl__id(trace->sctbl, s) >= 0 || + syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { list = 1; } else { path__join(group_name, sizeof(group_name), strace_groups_dir, s); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index e0279babe0c0..2f19e03c5c40 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -467,15 +467,21 @@ int main(int argc, const char **argv) * - cannot execute it externally (since it would just do * the same thing over again) * - * So we just directly call the internal command handler, and - * die if that one cannot handle it. + * So we just directly call the internal command handler. If that one + * fails to handle this, then maybe we just run a renamed perf binary + * that contains a dash in its name. To handle this scenario, we just + * fall through and ignore the "xxxx" part of the command string. */ if (strstarts(cmd, "perf-")) { cmd += 5; argv[0] = cmd; handle_internal_command(argc, argv); - fprintf(stderr, "cannot handle %s internally", cmd); - goto out; + /* + * If the command is handled, the above function does not + * return undo changes and fall through in such a case. + */ + cmd -= 5; + argv[0] = cmd; } if (strstarts(cmd, "trace")) { #ifdef HAVE_LIBAUDIT_SUPPORT diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 2c010dd6a79d..dc442ba21bf6 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -43,6 +43,7 @@ struct record_opts { bool no_samples; bool raw_samples; bool sample_address; + bool sample_phys_addr; bool sample_weight; bool sample_time; bool sample_time_set; diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index 7e62c46d7a20..c63a919eda98 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -80,11 +80,6 @@ "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." }, {, - "EventCode": "0x400F0", - "EventName": "PM_LD_MISS_L1", - "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load." - }, - {, "EventCode": "0x2E01A", "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT", "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete" @@ -374,4 +369,4 @@ "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" } -]
\ No newline at end of file +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 00f3d2a21f31..54cc3be00fc2 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -605,11 +605,6 @@ "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" }, {, - "EventCode": "0x3689E", - "EventName": "PM_L2_RTY_LD", - "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)" - }, - {, "EventCode": "0xE08C", "EventName": "PM_LSU0_ERAT_HIT", "BriefDescription": "Primary ERAT hit. There is no secondary ERAT" @@ -715,11 +710,6 @@ "BriefDescription": "Lifetime, sample of RD machine 0 valid" }, {, - "EventCode": "0x468B4", - "EventName": "PM_L3_RD0_BUSY", - "BriefDescription": "Lifetime, sample of RD machine 0 valid" - }, - {, "EventCode": "0x46080", "EventName": "PM_L2_DISP_ALL_L2MISS", "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" @@ -850,21 +840,11 @@ "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" }, {, - "EventCode": "0x2608C", - "EventName": "PM_RC0_BUSY", - "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)" - }, - {, "EventCode": "0x36082", "EventName": "PM_L2_LD_DISP", "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." }, {, - "EventCode": "0x1609E", - "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)" - }, - {, "EventCode": "0xF8B0", "EventName": "PM_L3_SW_PREF", "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest" @@ -1040,11 +1020,6 @@ "BriefDescription": "L3 castouts in Mepf state for this thread" }, {, - "EventCode": "0x168A0", - "EventName": "PM_L3_CO_MEPF", - "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request" - }, - {, "EventCode": "0x460A2", "EventName": "PM_L3_LAT_CI_HIT", "BriefDescription": "L3 Lateral Castins Hit" @@ -1150,11 +1125,6 @@ "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" }, {, - "EventCode": "0x4689E", - "EventName": "PM_L2_RTY_ST", - "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)" - }, - {, "EventCode": "0x24040", "EventName": "PM_INST_FROM_L2_MEPF", "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)" @@ -1255,11 +1225,6 @@ "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" }, {, - "EventCode": "0x4608C", - "EventName": "PM_CO0_BUSY", - "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)" - }, - {, "EventCode": "0x2C122", "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC", "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load" @@ -1395,11 +1360,6 @@ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request" }, {, - "EventCode": "0x40006", - "EventName": "PM_ISLB_MISS", - "BriefDescription": "Number of ISLB misses for this thread" - }, - {, "EventCode": "0xD8A8", "EventName": "PM_ISLB_MISS", "BriefDescription": "Instruction SLB miss - Total of all segment sizes" @@ -1515,11 +1475,6 @@ "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." }, {, - "EventCode": "0x3609E", - "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" - }, - {, "EventCode": "0x3504C", "EventName": "PM_IPTEG_FROM_DL4", "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request" @@ -1690,11 +1645,6 @@ "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)" }, {, - "EventCode": "0x2609E", - "EventName": "PM_L2_LD_HIT", - "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread" - }, - {, "EventCode": "0x168AC", "EventName": "PM_L3_CI_USAGE", "BriefDescription": "Rotating sample of 16 CI or CO actives" @@ -1795,21 +1745,11 @@ "BriefDescription": "Rotating sample of 8 WI valid" }, {, - "EventCode": "0x260B6", - "EventName": "PM_L3_WI0_BUSY", - "BriefDescription": "Rotating sample of 8 WI valid (duplicate)" - }, - {, "EventCode": "0x368AC", "EventName": "PM_L3_CO0_BUSY", "BriefDescription": "Lifetime, sample of CO machine 0 valid" }, {, - "EventCode": "0x468AC", - "EventName": "PM_L3_CO0_BUSY", - "BriefDescription": "Lifetime, sample of CO machine 0 valid" - }, - {, "EventCode": "0x2E040", "EventName": "PM_DPTEG_FROM_L2_MEPF", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included" @@ -1840,11 +1780,6 @@ "BriefDescription": "L3 PF received retry port 0, every retry counted" }, {, - "EventCode": "0x260AE", - "EventName": "PM_L3_P0_PF_RTY", - "BriefDescription": "L3 PF received retry port 0, every retry counted" - }, - {, "EventCode": "0x268B2", "EventName": "PM_L3_LOC_GUESS_WRONG", "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low" @@ -1895,11 +1830,6 @@ "BriefDescription": "Lifetime, sample of snooper machine 0 valid" }, {, - "EventCode": "0x460AC", - "EventName": "PM_L3_SN0_BUSY", - "BriefDescription": "Lifetime, sample of snooper machine 0 valid" - }, - {, "EventCode": "0x3005C", "EventName": "PM_BFU_BUSY", "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity" @@ -1935,11 +1865,6 @@ "BriefDescription": "Lifetime, sample of PF machine 0 valid" }, {, - "EventCode": "0x460B4", - "EventName": "PM_L3_PF0_BUSY", - "BriefDescription": "Lifetime, sample of PF machine 0 valid" - }, - {, "EventCode": "0xC0B0", "EventName": "PM_LSU_FLUSH_UE", "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" @@ -2085,11 +2010,6 @@ "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" }, {, - "EventCode": "0x468AE", - "EventName": "PM_L3_P1_CO_RTY", - "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted" - }, - {, "EventCode": "0xC0AC", "EventName": "PM_LSU_FLUSH_EMSH", "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" @@ -2195,11 +2115,6 @@ "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" }, {, - "EventCode": "0x46886", - "EventName": "PM_L2_SN_M_WR_DONE", - "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)" - }, - {, "EventCode": "0x489C", "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" @@ -2290,21 +2205,11 @@ "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" }, {, - "EventCode": "0x26090", - "EventName": "PM_SN0_BUSY", - "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)" - }, - {, "EventCode": "0x360AE", "EventName": "PM_L3_P0_CO_RTY", "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" }, {, - "EventCode": "0x460AE", - "EventName": "PM_L3_P0_CO_RTY", - "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted" - }, - {, "EventCode": "0x168A8", "EventName": "PM_L3_WI_USAGE", "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid" @@ -2340,26 +2245,11 @@ "BriefDescription": "L3 PF received retry port 1, every retry counted" }, {, - "EventCode": "0x268AE", - "EventName": "PM_L3_P1_PF_RTY", - "BriefDescription": "L3 PF received retry port 3, every retry counted" - }, - {, "EventCode": "0x46082", "EventName": "PM_L2_ST_DISP", "BriefDescription": "All successful D-side store dispatches for this thread " }, {, - "EventCode": "0x1689E", - "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" - }, - {, - "EventCode": "0x36880", - "EventName": "PM_L2_INST_MISS", - "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" - }, - {, "EventCode": "0x4609E", "EventName": "PM_L2_INST_MISS", "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" @@ -2430,11 +2320,6 @@ "BriefDescription": "# PPC Dispatched" }, {, - "EventCode": "0x300F2", - "EventName": "PM_INST_DISP", - "BriefDescription": "# PPC Dispatched" - }, - {, "EventCode": "0x4E05E", "EventName": "PM_TM_OUTER_TBEGIN_DISP", "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions" @@ -2460,11 +2345,6 @@ "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" }, {, - "EventCode": "0x2689E", - "EventName": "PM_L2_ST_HIT", - "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread" - }, - {, "EventCode": "0x360A8", "EventName": "PM_L3_CO", "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))" diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index 47a82568a8df..bc2db636dabf 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -420,11 +420,6 @@ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch" }, {, - "EventCode": "0x10016", - "EventName": "PM_DSLB_MISS", - "BriefDescription": "Data SLB Miss - Total of all segment sizes" - }, - {, "EventCode": "0xD0A8", "EventName": "PM_DSLB_MISS", "BriefDescription": "Data SLB Miss - Total of all segment sizes" @@ -554,4 +549,4 @@ "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" } -]
\ No newline at end of file +] diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index a2c95a99e168..3ef8a10aac86 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -5,11 +5,6 @@ "BriefDescription": "Branches that are not strongly biased" }, {, - "EventCode": "0x40036", - "EventName": "PM_BR_2PATH", - "BriefDescription": "Branches that are not strongly biased" - }, - {, "EventCode": "0x40056", "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH", "BriefDescription": "Local memory above threshold for LSU medium" @@ -124,4 +119,4 @@ "EventName": "PM_1FLOP_CMPL", "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" } -]
\ No newline at end of file +] diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 761c5a448c56..466a462b26d1 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al); if (!al.map || !al.map->dso) { + if (cpumode == PERF_RECORD_MISC_HYPERVISOR) { + pr_debug("Hypervisor address can not be resolved - skipping\n"); + return 0; + } + pr_debug("thread__find_addr_map failed\n"); return -1; } diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2a7b9b47bbcb..9ba1d216a89f 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -6,7 +6,7 @@ #include "debug.h" #include "machine.h" #include "event.h" -#include "unwind.h" +#include "../util/unwind.h" #include "perf_regs.h" #include "map.h" #include "thread.h" diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 6d028f42b3cf..c3858487159d 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1, } } + if (type & PERF_SAMPLE_PHYS_ADDR) + COMP(phys_addr); + return true; } @@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .mask = sample_regs, .regs = regs, }, + .phys_addr = 113, }; struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},}; struct perf_sample sample_out; @@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u * were added. Please actually update the test rather than just change * the condition below. */ - if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) { + if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) { pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n"); return -1; } diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ba0aee576a2b..786fecaf578e 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser, "q/ESC/CTRL+C Exit\n\n" "ENTER Go to target\n" "ESC Exit\n" - "H Cycle thru hottest instructions\n" + "H Go to hottest instruction\n" + "TAB/shift+TAB Cycle thru hottest instructions\n" "j Toggle showing jump to target arrows\n" "J Toggle showing number of jump sources on targets\n" "n Search next string\n" diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f4bc2462bc2c..13dfb0a0bdeb 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser, browser->show_dso); if (symbol_conf.show_branchflag_count) { - if (need_percent) - callchain_list_counts__printf_value(node, chain, NULL, - buf, sizeof(buf)); - else - callchain_list_counts__printf_value(NULL, chain, NULL, - buf, sizeof(buf)); + callchain_list_counts__printf_value(chain, NULL, + buf, sizeof(buf)); if (asprintf(&alloc_str2, "%s%s", str, buf) < 0) str = "Not enough memory!"; diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c index a0f24c7115c5..ae91c8148edf 100644 --- a/tools/perf/ui/progress.c +++ b/tools/perf/ui/progress.c @@ -1,3 +1,4 @@ +#include <linux/kernel.h> #include "../cache.h" #include "progress.h" @@ -14,10 +15,14 @@ struct ui_progress_ops *ui_progress__ops = &null_progress__ops; void ui_progress__update(struct ui_progress *p, u64 adv) { + u64 last = p->curr; + p->curr += adv; if (p->curr >= p->next) { - p->next += p->step; + u64 nr = DIV_ROUND_UP(p->curr - last, p->step); + + p->next += nr * p->step; ui_progress__ops->update(p); } } @@ -25,7 +30,7 @@ void ui_progress__update(struct ui_progress *p, u64 adv) void ui_progress__init(struct ui_progress *p, u64 total, const char *title) { p->curr = 0; - p->next = p->step = total / 16; + p->next = p->step = total / 16 ?: 1; p->total = total; p->title = title; diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 5c95b8301c67..8bdb7a500181 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node, str = callchain_list__sym_name(chain, bf, sizeof(bf), false); if (symbol_conf.show_branchflag_count) { - if (!period) - callchain_list_counts__printf_value(node, chain, NULL, - buf, sizeof(buf)); - else - callchain_list_counts__printf_value(NULL, chain, NULL, - buf, sizeof(buf)); + callchain_list_counts__printf_value(chain, NULL, + buf, sizeof(buf)); if (asprintf(&alloc_str, "%s%s", str, buf) < 0) str = "Not enough memory!"; @@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root, if (symbol_conf.show_branchflag_count) ret += callchain_list_counts__printf_value( - NULL, chain, fp, NULL, 0); + chain, fp, NULL, 0); ret += fprintf(fp, "\n"); if (++entries_printed == callchain_param.print_limit) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f320b0777e0d..be09d77cade0 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value) callchain_param.mode = CHAIN_FOLDED; return 0; } - - pr_err("Invalid callchain mode: %s\n", value); return -1; } @@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value) callchain_param.order_set = true; return 0; } - - pr_err("Invalid callchain order: %s\n", value); return -1; } @@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value) callchain_param.branch_callstack = 1; return 0; } - - pr_err("Invalid callchain sort key: %s\n", value); return -1; } @@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value) callchain_param.value = CCVAL_COUNT; return 0; } - - pr_err("Invalid callchain config key: %s\n", value); return -1; } @@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value) return ret; } - if (!strcmp(var, "print-type")) - return parse_callchain_mode(value); - if (!strcmp(var, "order")) - return parse_callchain_order(value); - if (!strcmp(var, "sort-key")) - return parse_callchain_sort_key(value); + if (!strcmp(var, "print-type")){ + int ret; + ret = parse_callchain_mode(value); + if (ret == -1) + pr_err("Invalid callchain mode: %s\n", value); + return ret; + } + if (!strcmp(var, "order")){ + int ret; + ret = parse_callchain_order(value); + if (ret == -1) + pr_err("Invalid callchain order: %s\n", value); + return ret; + } + if (!strcmp(var, "sort-key")){ + int ret; + ret = parse_callchain_sort_key(value); + if (ret == -1) + pr_err("Invalid callchain sort key: %s\n", value); + return ret; + } if (!strcmp(var, "threshold")) { callchain_param.min_percent = strtod(value, &endptr); if (value == endptr) { @@ -588,7 +595,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor) call->cycles_count = cursor_node->branch_flags.cycles; call->iter_count = cursor_node->nr_loop_iter; - call->samples_count = cursor_node->samples; + call->iter_cycles = cursor_node->iter_cycles; } } @@ -722,7 +729,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node, cnode->cycles_count += node->branch_flags.cycles; cnode->iter_count += node->nr_loop_iter; - cnode->samples_count += node->samples; + cnode->iter_cycles += node->iter_cycles; } } @@ -998,7 +1005,7 @@ int callchain_merge(struct callchain_cursor *cursor, int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, int samples, u64 branch_from) + int nr_loop_iter, u64 iter_cycles, u64 branch_from) { struct callchain_cursor_node *node = *cursor->last; @@ -1016,7 +1023,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor, node->sym = sym; node->branch = branch; node->nr_loop_iter = nr_loop_iter; - node->samples = samples; + node->iter_cycles = iter_cycles; if (flags) memcpy(&node->branch_flags, flags, @@ -1306,7 +1313,7 @@ static int branch_to_str(char *bf, int bfsize, static int branch_from_str(char *bf, int bfsize, u64 branch_count, u64 cycles_count, u64 iter_count, - u64 samples_count) + u64 iter_cycles) { int printed = 0, i = 0; u64 cycles; @@ -1318,9 +1325,13 @@ static int branch_from_str(char *bf, int bfsize, bf + printed, bfsize - printed); } - if (iter_count && samples_count) { - printed += count_pri64_printf(i++, "iterations", - iter_count / samples_count, + if (iter_count) { + printed += count_pri64_printf(i++, "iter", + iter_count, + bf + printed, bfsize - printed); + + printed += count_pri64_printf(i++, "avg_cycles", + iter_cycles / iter_count, bf + printed, bfsize - printed); } @@ -1333,7 +1344,7 @@ static int branch_from_str(char *bf, int bfsize, static int counts_str_build(char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count, + u64 iter_count, u64 iter_cycles, struct branch_type_stat *brtype_stat) { int printed; @@ -1346,7 +1357,7 @@ static int counts_str_build(char *bf, int bfsize, predicted_count, abort_count, brtype_stat); } else { printed = branch_from_str(bf, bfsize, branch_count, - cycles_count, iter_count, samples_count); + cycles_count, iter_count, iter_cycles); } if (!printed) @@ -1358,14 +1369,14 @@ static int counts_str_build(char *bf, int bfsize, static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, u64 branch_count, u64 predicted_count, u64 abort_count, u64 cycles_count, - u64 iter_count, u64 samples_count, + u64 iter_count, u64 iter_cycles, struct branch_type_stat *brtype_stat) { char str[256]; counts_str_build(str, sizeof(str), branch_count, predicted_count, abort_count, cycles_count, - iter_count, samples_count, brtype_stat); + iter_count, iter_cycles, brtype_stat); if (fp) return fprintf(fp, "%s", str); @@ -1373,31 +1384,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, return scnprintf(bf, bfsize, "%s", str); } -int callchain_list_counts__printf_value(struct callchain_node *node, - struct callchain_list *clist, +int callchain_list_counts__printf_value(struct callchain_list *clist, FILE *fp, char *bf, int bfsize) { u64 branch_count, predicted_count; u64 abort_count, cycles_count; - u64 iter_count = 0, samples_count = 0; + u64 iter_count, iter_cycles; branch_count = clist->branch_count; predicted_count = clist->predicted_count; abort_count = clist->abort_count; cycles_count = clist->cycles_count; - - if (node) { - struct callchain_list *call; - - list_for_each_entry(call, &node->val, list) { - iter_count += call->iter_count; - samples_count += call->samples_count; - } - } + iter_count = clist->iter_count; + iter_cycles = clist->iter_cycles; return callchain_counts_printf(fp, bf, bfsize, branch_count, predicted_count, abort_count, - cycles_count, iter_count, samples_count, + cycles_count, iter_count, iter_cycles, &clist->brtype_stat); } @@ -1523,7 +1526,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst, rc = callchain_cursor_append(dst, node->ip, node->map, node->sym, node->branch, &node->branch_flags, - node->nr_loop_iter, node->samples, + node->nr_loop_iter, + node->iter_cycles, node->branch_from); if (rc) break; diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 97738201464a..1ed6fc61d0a5 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -119,7 +119,7 @@ struct callchain_list { u64 abort_count; u64 cycles_count; u64 iter_count; - u64 samples_count; + u64 iter_cycles; struct branch_type_stat brtype_stat; char *srcline; struct list_head list; @@ -139,7 +139,7 @@ struct callchain_cursor_node { struct branch_flags branch_flags; u64 branch_from; int nr_loop_iter; - int samples; + u64 iter_cycles; struct callchain_cursor_node *next; }; @@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor) int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, struct map *map, struct symbol *sym, bool branch, struct branch_flags *flags, - int nr_loop_iter, int samples, u64 branch_from); + int nr_loop_iter, u64 iter_cycles, u64 branch_from); /* Close a cursor writing session. Initialize for the reader */ static inline void callchain_cursor_commit(struct callchain_cursor *cursor) @@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node, int callchain_node__fprintf_value(struct callchain_node *node, FILE *fp, u64 total); -int callchain_list_counts__printf_value(struct callchain_node *node, - struct callchain_list *clist, +int callchain_list_counts__printf_value(struct callchain_list *clist, FILE *fp, char *bf, int bfsize); void free_callchain(struct callchain_root *root); diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e84bbc8ec058..263f5a906ba5 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -10,6 +10,16 @@ #include "util.h" #include "debug.h" +#ifndef O_CLOEXEC +#ifdef __sparc__ +#define O_CLOEXEC 0x400000 +#elif defined(__alpha__) || defined(__hppa__) +#define O_CLOEXEC 010000000 +#else +#define O_CLOEXEC 02000000 +#endif +#endif + static bool check_pipe(struct perf_data_file *file) { struct stat st; @@ -96,7 +106,8 @@ static int open_file_write(struct perf_data_file *file) if (check_backup(file)) return -1; - fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR); + fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, + S_IRUSR|S_IWUSR); if (fd < 0) pr_err("failed to open %s : %s\n", file->path, diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 423ac82605f3..ee7bcc898d35 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -200,6 +200,7 @@ struct perf_sample { u32 cpu; u32 raw_size; u64 data_src; + u64 phys_addr; u32 flags; u16 insn_len; u8 cpumode; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d9bd632ed7db..0dccdb89572c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) return evsel; } +static bool perf_event_can_profile_kernel(void) +{ + return geteuid() == 0 || perf_event_paranoid() == -1; +} + struct perf_evsel *perf_evsel__new_cycles(bool precise) { struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, - .exclude_kernel = geteuid() != 0, + .exclude_kernel = !perf_event_can_profile_kernel(), }; struct perf_evsel *evsel; @@ -955,6 +960,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, if (opts->sample_address) perf_evsel__set_sample_bit(evsel, DATA_SRC); + if (opts->sample_phys_addr) + perf_evsel__set_sample_bit(evsel, PHYS_ADDR); + if (opts->no_buffering) { attr->watermark = 0; attr->wakeup_events = 1; @@ -1464,7 +1472,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value) bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC), - bit_name(WEIGHT), + bit_name(WEIGHT), bit_name(PHYS_ADDR), { .name = NULL, } }; #undef bit_name @@ -2206,6 +2214,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, } } + data->phys_addr = 0; + if (type & PERF_SAMPLE_PHYS_ADDR) { + data->phys_addr = *array; + array++; + } + return 0; } @@ -2311,6 +2325,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, } } + if (type & PERF_SAMPLE_PHYS_ADDR) + result += sizeof(u64); + return result; } @@ -2500,6 +2517,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, } } + if (type & PERF_SAMPLE_PHYS_ADDR) { + *array = sample->phys_addr; + array++; + } + return 0; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 351d3b2d8887..dd2c4b5112a5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -131,6 +131,7 @@ struct perf_evsel { bool cmdline_group_boundary; struct list_head config_terms; int bpf_fd; + bool auto_merge_stats; bool merged_stat; const char * metric_expr; const char * metric_name; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 9453b2e27015..e60d8d8ea4c2 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) symlen = unresolved_col_width + 4 + 2; hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO); } + + hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR, + unresolved_col_width + 4 + 2); + } else { symlen = unresolved_col_width + 4 + 2; hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index ee3670a388df..e60dda26a920 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -47,6 +47,7 @@ enum hist_column { HISTC_GLOBAL_WEIGHT, HISTC_MEM_DADDR_SYMBOL, HISTC_MEM_DADDR_DSO, + HISTC_MEM_PHYS_DADDR, HISTC_MEM_LOCKED, HISTC_MEM_TLB, HISTC_MEM_LVL, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 5c8eacaca4f4..df709363ef69 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread, ams->al_addr = al.addr; ams->sym = al.sym; ams->map = al.map; + ams->phys_addr = 0; } static void ip__resolve_data(struct thread *thread, - u8 m, struct addr_map_symbol *ams, u64 addr) + u8 m, struct addr_map_symbol *ams, + u64 addr, u64 phys_addr) { struct addr_location al; @@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread, ams->al_addr = al.addr; ams->sym = al.sym; ams->map = al.map; + ams->phys_addr = phys_addr; } struct mem_info *sample__resolve_mem(struct perf_sample *sample, @@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, return NULL; ip__resolve_ams(al->thread, &mi->iaddr, sample->ip); - ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr); + ip__resolve_data(al->thread, al->cpumode, &mi->daddr, + sample->addr, sample->phys_addr); mi->data_src.val = sample->data_src; return mi; } +struct iterations { + int nr_loop_iter; + u64 cycles; +}; + static int add_callchain_ip(struct thread *thread, struct callchain_cursor *cursor, struct symbol **parent, @@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread, u64 ip, bool branch, struct branch_flags *flags, - int nr_loop_iter, - int samples, + struct iterations *iter, u64 branch_from) { struct addr_location al; + int nr_loop_iter = 0; + u64 iter_cycles = 0; al.filtered = 0; al.sym = NULL; @@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread, if (symbol_conf.hide_unresolved && al.sym == NULL) return 0; + + if (iter) { + nr_loop_iter = iter->nr_loop_iter; + iter_cycles = iter->cycles; + } + return callchain_cursor_append(cursor, al.addr, al.map, al.sym, - branch, flags, nr_loop_iter, samples, - branch_from); + branch, flags, nr_loop_iter, + iter_cycles, branch_from); } struct branch_info *sample__resolve_bstack(struct perf_sample *sample, @@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, return bi; } +static void save_iterations(struct iterations *iter, + struct branch_entry *be, int nr) +{ + int i; + + iter->nr_loop_iter = nr; + iter->cycles = 0; + + for (i = 0; i < nr; i++) + iter->cycles += be[i].flags.cycles; +} + #define CHASHSZ 127 #define CHASHBITS 7 #define NO_ENTRY 0xff @@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample, #define PERF_MAX_BRANCH_DEPTH 127 /* Remove loops. */ -static int remove_loops(struct branch_entry *l, int nr) +static int remove_loops(struct branch_entry *l, int nr, + struct iterations *iter) { int i, j, off; unsigned char chash[CHASHSZ]; @@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr) break; } if (is_loop) { - memmove(l + i, l + i + off, - (nr - (i + off)) * sizeof(*l)); + j = nr - (i + off); + if (j > 0) { + save_iterations(iter + i + off, + l + i, off); + + memmove(iter + i, iter + i + off, + j * sizeof(*iter)); + + memmove(l + i, l + i + off, + j * sizeof(*l)); + } + nr -= off; } } @@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, - branch, flags, 0, 0, + branch, flags, NULL, branch_from); if (err) return (err < 0) ? err : 0; @@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, int i, j, err, nr_entries; int skip_idx = -1; int first_call = 0; - int nr_loop_iter; if (chain) chain_nr = chain->nr; @@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (branch && callchain_param.branch_callstack) { int nr = min(max_stack, (int)branch->nr); struct branch_entry be[nr]; + struct iterations iter[nr]; if (branch->nr > PERF_MAX_BRANCH_DEPTH) { pr_warning("corrupted branch chain. skipping...\n"); @@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread, be[i] = branch->entries[branch->nr - i - 1]; } - nr_loop_iter = nr; - nr = remove_loops(be, nr); - - /* - * Get the number of iterations. - * It's only approximation, but good enough in practice. - */ - if (nr_loop_iter > nr) - nr_loop_iter = nr_loop_iter - nr + 1; - else - nr_loop_iter = 0; + memset(iter, 0, sizeof(struct iterations) * nr); + nr = remove_loops(be, nr, iter); for (i = 0; i < nr; i++) { - if (i == nr - 1) - err = add_callchain_ip(thread, cursor, parent, - root_al, - NULL, be[i].to, - true, &be[i].flags, - nr_loop_iter, 1, - be[i].from); - else - err = add_callchain_ip(thread, cursor, parent, - root_al, - NULL, be[i].to, - true, &be[i].flags, - 0, 0, be[i].from); + err = add_callchain_ip(thread, cursor, parent, + root_al, + NULL, be[i].to, + true, &be[i].flags, + NULL, be[i].from); if (!err) err = add_callchain_ip(thread, cursor, parent, root_al, NULL, be[i].from, true, &be[i].flags, - 0, 0, 0); + &iter[i], 0); if (err == -EINVAL) break; if (err) @@ -2037,7 +2059,7 @@ check_calls: err = add_callchain_ip(thread, cursor, parent, root_al, &cpumode, ip, - false, NULL, 0, 0, 0); + false, NULL, NULL, 0); if (err) return (err < 0) ? err : 0; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index f44aeba51d1f..f6257fb4f08c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -310,7 +310,7 @@ static struct perf_evsel * __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct cpu_map *cpus, - struct list_head *config_terms) + struct list_head *config_terms, bool auto_merge_stats) { struct perf_evsel *evsel; @@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx, evsel->cpus = cpu_map__get(cpus); evsel->own_cpus = cpu_map__get(cpus); evsel->system_wide = !!cpus; + evsel->auto_merge_stats = auto_merge_stats; if (name) evsel->name = strdup(name); @@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, char *name, struct list_head *config_terms) { - return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; + return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM; } static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) @@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, get_config_name(head_config), &config_terms); } -int parse_events_add_pmu(struct parse_events_state *parse_state, +static int __parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, - struct list_head *head_config) + struct list_head *head_config, bool auto_merge_stats) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL); + evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats); return evsel ? 0 : -ENOMEM; } @@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, get_config_name(head_config), pmu->cpus, - &config_terms); + &config_terms, auto_merge_stats); if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; @@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, return evsel ? 0 : -ENOMEM; } +int parse_events_add_pmu(struct parse_events_state *parse_state, + struct list_head *list, char *name, + struct list_head *head_config) +{ + return __parse_events_add_pmu(parse_state, list, name, head_config, false); +} + int parse_events_multi_pmu_add(struct parse_events_state *parse_state, char *str, struct list_head **listp) { @@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, return -1; list_add_tail(&term->list, head); - if (!parse_events_add_pmu(parse_state, list, - pmu->name, head)) { + if (!__parse_events_add_pmu(parse_state, list, + pmu->name, head, true)) { pr_debug("%s -> %s/%s/\n", str, pmu->name, alias->str); ok++; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ac863691605f..a7ebd9fe8e40 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) printf(" . data_src: 0x%"PRIx64"\n", sample->data_src); + if (sample_type & PERF_SAMPLE_PHYS_ADDR) + printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr); + if (sample_type & PERF_SAMPLE_TRANSACTION) printf("... transaction: %" PRIx64 "\n", sample->transaction); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 12359bd986db..eb3ab902a1c0 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -1316,6 +1316,47 @@ struct sort_entry sort_mem_dcacheline = { }; static int64_t +sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = left->mem_info->daddr.phys_addr; + if (right->mem_info) + r = right->mem_info->daddr.phys_addr; + + return (int64_t)(r - l); +} + +static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + uint64_t addr = 0; + size_t ret = 0; + size_t len = BITS_PER_LONG / 4; + + addr = he->mem_info->daddr.phys_addr; + + ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level); + + ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr); + + ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, ""); + + if (ret > width) + bf[width] = '\0'; + + return width; +} + +struct sort_entry sort_mem_phys_daddr = { + .se_header = "Data Physical Address", + .se_cmp = sort__phys_daddr_cmp, + .se_snprintf = hist_entry__phys_daddr_snprintf, + .se_width_idx = HISTC_MEM_PHYS_DADDR, +}; + +static int64_t sort__abort_cmp(struct hist_entry *left, struct hist_entry *right) { if (!left->branch_info || !right->branch_info) @@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = { DIM(SORT_MEM_LVL, "mem", sort_mem_lvl), DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop), DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), + DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index b7c75597e18f..f36dc4980a6c 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -245,6 +245,7 @@ enum sort_type { SORT_MEM_SNOOP, SORT_MEM_DCACHELINE, SORT_MEM_IADDR_SYMBOL, + SORT_MEM_PHYS_DADDR, }; /* diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 5c39f420111e..9cf781f0d8a2 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } -void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr, - struct map *map __maybe_unused) -{ - sym->st_value -= shdr->sh_addr - shdr->sh_offset; -} - int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, int kmodule) { @@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, /* Adjust symbol to map to file offset */ if (adjust_kernel_syms) - arch__adjust_sym_map_offset(&sym, &shdr, map); + sym.st_value -= shdr.sh_addr - shdr.sh_offset; if (strcmp(section_name, (curr_dso->short_name + diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index d00a012cfdfb..aad99e7e179b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -186,6 +186,7 @@ struct addr_map_symbol { struct symbol *sym; u64 addr; u64 al_addr; + u64 phys_addr; }; struct branch_info { @@ -343,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str, #ifdef HAVE_LIBELF_SUPPORT bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); -void arch__adjust_sym_map_offset(GElf_Sym *sym, - GElf_Shdr *shdr __maybe_unused, - struct map *map __maybe_unused); #endif #define SYMBOL_A 0 diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index bbb4c1957578..6eea7cff3d4e 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -15,10 +15,11 @@ #include "syscalltbl.h" #include <stdlib.h> +#include <linux/compiler.h> #ifdef HAVE_SYSCALL_TABLE -#include <linux/compiler.h> #include <string.h> +#include "string2.h" #include "util.h" #if defined(__x86_64__) @@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) return sc ? sc->id : -1; } +int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +{ + int i; + struct syscall *syscalls = tbl->syscalls.entries; + + for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) { + if (strglobmatch(syscalls[i].name, syscall_glob)) { + *idx = i; + return syscalls[i].id; + } + } + + return -1; +} + +int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +{ + *idx = -1; + return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); +} + #else /* HAVE_SYSCALL_TABLE */ #include <libaudit.h> @@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) { return audit_name_to_syscall(name, tbl->audit_machine); } + +int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, + const char *syscall_glob __maybe_unused, int *idx __maybe_unused) +{ + return -1; +} + +int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx) +{ + return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx); +} #endif /* HAVE_SYSCALL_TABLE */ diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h index e2951510484f..e9fb8786da7c 100644 --- a/tools/perf/util/syscalltbl.h +++ b/tools/perf/util/syscalltbl.h @@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl); const char *syscalltbl__name(const struct syscalltbl *tbl, int id); int syscalltbl__id(struct syscalltbl *tbl, const char *name); +int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx); +int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx); + #endif /* __PERF_SYSCALLTBL_H */ diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 82a2ff896a95..52a39ecf5ca1 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -759,7 +759,7 @@ static acpi_status osl_list_bios_tables(void) /* Skip NULL entries in RSDT/XSDT */ - if (!table_address) { + if (table_address == 0) { continue; } @@ -808,7 +808,8 @@ osl_get_bios_table(char *signature, u8 number_of_tables; u8 item_size; u32 current_instance = 0; - acpi_physical_address table_address = 0; + acpi_physical_address table_address; + acpi_physical_address first_table_address = 0; u32 table_length = 0; acpi_status status = AE_OK; u32 i; @@ -820,9 +821,10 @@ osl_get_bios_table(char *signature, ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT) || ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT) || ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) { - if (instance > 0) { - return (AE_LIMIT); - } + +find_next_instance: + + table_address = 0; /* * Get the appropriate address, either 32-bit or 64-bit. Be very @@ -830,41 +832,66 @@ osl_get_bios_table(char *signature, * Note: The 64-bit addresses have priority. */ if (ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT)) { - if ((gbl_fadt->header.length >= MIN_FADT_FOR_XDSDT) && - gbl_fadt->Xdsdt) { - table_address = - (acpi_physical_address)gbl_fadt->Xdsdt; - } else - if ((gbl_fadt->header.length >= MIN_FADT_FOR_DSDT) - && gbl_fadt->dsdt) { - table_address = - (acpi_physical_address)gbl_fadt->dsdt; + if (current_instance < 2) { + if ((gbl_fadt->header.length >= + MIN_FADT_FOR_XDSDT) && gbl_fadt->Xdsdt + && current_instance == 0) { + table_address = + (acpi_physical_address)gbl_fadt-> + Xdsdt; + } else + if ((gbl_fadt->header.length >= + MIN_FADT_FOR_DSDT) + && gbl_fadt->dsdt != + first_table_address) { + table_address = + (acpi_physical_address)gbl_fadt-> + dsdt; + } } } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) { - if ((gbl_fadt->header.length >= MIN_FADT_FOR_XFACS) && - gbl_fadt->Xfacs) { - table_address = - (acpi_physical_address)gbl_fadt->Xfacs; - } else - if ((gbl_fadt->header.length >= MIN_FADT_FOR_FACS) - && gbl_fadt->facs) { - table_address = - (acpi_physical_address)gbl_fadt->facs; + if (current_instance < 2) { + if ((gbl_fadt->header.length >= + MIN_FADT_FOR_XFACS) && gbl_fadt->Xfacs + && current_instance == 0) { + table_address = + (acpi_physical_address)gbl_fadt-> + Xfacs; + } else + if ((gbl_fadt->header.length >= + MIN_FADT_FOR_FACS) + && gbl_fadt->facs != + first_table_address) { + table_address = + (acpi_physical_address)gbl_fadt-> + facs; + } } } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) { if (!gbl_revision) { return (AE_BAD_SIGNATURE); } - table_address = - (acpi_physical_address)gbl_rsdp. - xsdt_physical_address; + if (current_instance == 0) { + table_address = + (acpi_physical_address)gbl_rsdp. + xsdt_physical_address; + } } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) { - table_address = - (acpi_physical_address)gbl_rsdp. - rsdt_physical_address; + if (current_instance == 0) { + table_address = + (acpi_physical_address)gbl_rsdp. + rsdt_physical_address; + } } else { - table_address = (acpi_physical_address)gbl_rsdp_address; - signature = ACPI_SIG_RSDP; + if (current_instance == 0) { + table_address = + (acpi_physical_address)gbl_rsdp_address; + signature = ACPI_SIG_RSDP; + } + } + + if (table_address == 0) { + goto exit_find_table; } /* Now we can get the requested special table */ @@ -875,6 +902,18 @@ osl_get_bios_table(char *signature, } table_length = ap_get_table_length(mapped_table); + if (first_table_address == 0) { + first_table_address = table_address; + } + + /* Match table instance */ + + if (current_instance != instance) { + osl_unmap_table(mapped_table); + mapped_table = NULL; + current_instance++; + goto find_next_instance; + } } else { /* Case for a normal ACPI table */ if (osl_can_use_xsdt()) { @@ -913,7 +952,7 @@ osl_get_bios_table(char *signature, /* Skip NULL entries in RSDT/XSDT */ - if (!table_address) { + if (table_address == 0) { continue; } @@ -946,6 +985,8 @@ osl_get_bios_table(char *signature, } } +exit_find_table: + if (!mapped_table) { return (AE_LIMIT); } diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 31b5a7f74015..d686e11936c4 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -61,7 +61,7 @@ static int ap_is_existing_file(char *pathname); static int ap_is_existing_file(char *pathname) { -#ifndef _GNU_EFI +#if !defined(_GNU_EFI) && !defined(_EDK2_EFI) struct stat stat_info; if (!stat(pathname, &stat_info)) { diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index dd82afa897bd..943b6b614683 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -300,7 +300,7 @@ static int ap_do_options(int argc, char **argv) * ******************************************************************************/ -#ifndef _GNU_EFI +#if !defined(_GNU_EFI) && !defined(_EDK2_EFI) int ACPI_SYSTEM_XFACE main(int argc, char *argv[]) #else int ACPI_SYSTEM_XFACE acpi_main(int argc, char *argv[]) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index d6e1c02ddcfe..4c5a481a850c 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -26,7 +26,7 @@ endif ifneq ($(OUTPUT),) # check that the output directory actually exists -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +OUTDIR := $(realpath $(OUTPUT)) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 9ea914378985..2dccf4998599 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -12,6 +12,7 @@ #include <string.h> #include <unistd.h> #include <errno.h> +#include <sched.h> #include <sys/types.h> #include <sys/stat.h> #include <sys/utsname.h> @@ -31,6 +32,7 @@ static int cmd_help(int argc, const char **argv); */ struct cpupower_cpu_info cpupower_cpu_info; int run_as_root; +int base_cpu; /* Affected cpus chosen by -c/--cpu param */ struct bitmask *cpus_chosen; @@ -174,6 +176,7 @@ int main(int argc, const char *argv[]) unsigned int i, ret; struct stat statbuf; struct utsname uts; + char pathname[32]; cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF)); @@ -198,17 +201,23 @@ int main(int argc, const char *argv[]) argv[0] = cmd = "help"; } - get_cpu_info(0, &cpupower_cpu_info); + base_cpu = sched_getcpu(); + if (base_cpu < 0) { + fprintf(stderr, _("No valid cpus found.\n")); + return EXIT_FAILURE; + } + + get_cpu_info(&cpupower_cpu_info); run_as_root = !geteuid(); if (run_as_root) { ret = uname(&uts); + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); if (!ret && !strcmp(uts.machine, "x86_64") && - stat("/dev/cpu/0/msr", &statbuf) != 0) { + stat(pathname, &statbuf) != 0) { if (system("modprobe msr") == -1) fprintf(stderr, _("MSR access not available.\n")); } } - for (i = 0; i < ARRAY_SIZE(commands); i++) { struct cmd_struct *p = commands + i; diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c index 39c2c7d067bb..32d37c9be791 100644 --- a/tools/power/cpupower/utils/helpers/cpuid.c +++ b/tools/power/cpupower/utils/helpers/cpuid.c @@ -42,7 +42,7 @@ cpuid_func(edx); * * TBD: Should there be a cpuid alternative for this if /proc is not mounted? */ -int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info) +int get_cpu_info(struct cpupower_cpu_info *cpu_info) { FILE *fp; char value[64]; @@ -70,7 +70,7 @@ int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info) if (!strncmp(value, "processor\t: ", 12)) sscanf(value, "processor\t: %u", &proc); - if (proc != cpu) + if (proc != (unsigned int)base_cpu) continue; /* Get CPU vendor */ diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 799a18be60aa..41da392be448 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -34,6 +34,7 @@ /* Internationalization ****************************/ extern int run_as_root; +extern int base_cpu; extern struct bitmask *cpus_chosen; /* Global verbose (-d) stuff *********************************/ @@ -87,11 +88,11 @@ struct cpupower_cpu_info { * * Extract CPU vendor, family, model, stepping info from /proc/cpuinfo * - * Returns 0 on success or a negativ error code + * Returns 0 on success or a negative error code * Only used on x86, below global's struct values are zero/unknown on * other archs */ -extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info); +extern int get_cpu_info(struct cpupower_cpu_info *cpu_info); extern struct cpupower_cpu_info cpupower_cpu_info; /* cpuid and cpuinfo helpers **************************/ diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c index 601d719d4e60..a5e7ddf19dbd 100644 --- a/tools/power/cpupower/utils/helpers/misc.c +++ b/tools/power/cpupower/utils/helpers/misc.c @@ -13,7 +13,7 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active, *support = *active = *states = 0; - ret = get_cpu_info(0, &cpu_info); + ret = get_cpu_info(&cpu_info); if (ret) return ret; diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index ebeaba6571a3..f794d6bbb7e9 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -123,7 +123,7 @@ static int hsw_ext_start(void) previous_count[num][cpu] = val; } } - hsw_ext_get_count(TSC, &tsc_at_measure_start, 0); + hsw_ext_get_count(TSC, &tsc_at_measure_start, base_cpu); return 0; } @@ -132,7 +132,7 @@ static int hsw_ext_stop(void) unsigned long long val; int num, cpu; - hsw_ext_get_count(TSC, &tsc_at_measure_end, 0); + hsw_ext_get_count(TSC, &tsc_at_measure_end, base_cpu); for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index c83f1606970b..d7c2a6d13dea 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -80,7 +80,8 @@ static int *is_valid; static int mperf_get_tsc(unsigned long long *tsc) { int ret; - ret = read_msr(0, MSR_TSC, tsc); + + ret = read_msr(base_cpu, MSR_TSC, tsc); if (ret) dprint("Reading TSC MSR failed, returning %llu\n", *tsc); return ret; diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c index d2a91dd0d563..abf8cb5f7349 100644 --- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c @@ -129,7 +129,7 @@ static int nhm_start(void) int num, cpu; unsigned long long dbg, val; - nhm_get_count(TSC, &tsc_at_measure_start, 0); + nhm_get_count(TSC, &tsc_at_measure_start, base_cpu); for (num = 0; num < NHM_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { @@ -137,7 +137,7 @@ static int nhm_start(void) previous_count[num][cpu] = val; } } - nhm_get_count(TSC, &dbg, 0); + nhm_get_count(TSC, &dbg, base_cpu); dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start); return 0; } @@ -148,7 +148,7 @@ static int nhm_stop(void) unsigned long long dbg; int num, cpu; - nhm_get_count(TSC, &tsc_at_measure_end, 0); + nhm_get_count(TSC, &tsc_at_measure_end, base_cpu); for (num = 0; num < NHM_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { @@ -156,7 +156,7 @@ static int nhm_stop(void) current_count[num][cpu] = val; } } - nhm_get_count(TSC, &dbg, 0); + nhm_get_count(TSC, &dbg, base_cpu); dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); return 0; diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c index efc8a69c9aba..a2b45219648d 100644 --- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c @@ -120,7 +120,7 @@ static int snb_start(void) previous_count[num][cpu] = val; } } - snb_get_count(TSC, &tsc_at_measure_start, 0); + snb_get_count(TSC, &tsc_at_measure_start, base_cpu); return 0; } @@ -129,7 +129,7 @@ static int snb_stop(void) unsigned long long val; int num, cpu; - snb_get_count(TSC, &tsc_at_measure_end, 0); + snb_get_count(TSC, &tsc_at_measure_end, base_cpu); for (num = 0; num < SNB_CSTATE_COUNT; num++) { for (cpu = 0; cpu < cpu_count; cpu++) { diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile index 4d0ccc89e6c6..32f40eacdafe 100644 --- a/tools/power/pm-graph/Makefile +++ b/tools/power/pm-graph/Makefile @@ -4,7 +4,7 @@ DESTDIR ?= all: @echo "Nothing to build" -install : +install : uninstall install -d $(DESTDIR)$(PREFIX)/lib/pm-graph install analyze_suspend.py $(DESTDIR)$(PREFIX)/lib/pm-graph install analyze_boot.py $(DESTDIR)$(PREFIX)/lib/pm-graph @@ -17,12 +17,15 @@ install : install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 uninstall : - rm $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8 - rm $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8 + rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8 + rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8 - rm $(DESTDIR)$(PREFIX)/bin/bootgraph - rm $(DESTDIR)$(PREFIX)/bin/sleepgraph + rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph + rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph - rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py - rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph + rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py + rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py + rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*.pyc + if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \ + rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \ + fi; diff --git a/tools/power/pm-graph/analyze_boot.py b/tools/power/pm-graph/analyze_boot.py index 3e1dcbbf1adc..e83df141a597 100755 --- a/tools/power/pm-graph/analyze_boot.py +++ b/tools/power/pm-graph/analyze_boot.py @@ -42,7 +42,7 @@ import analyze_suspend as aslib # store system values and test parameters class SystemValues(aslib.SystemValues): title = 'BootGraph' - version = 2.0 + version = '2.1' hostname = 'localhost' testtime = '' kernel = '' @@ -50,9 +50,14 @@ class SystemValues(aslib.SystemValues): ftracefile = '' htmlfile = 'bootgraph.html' outfile = '' - phoronix = False - addlogs = False + testdir = '' + testdirprefix = 'boot' + embedded = False + testlog = False + dmesglog = False + ftracelog = False useftrace = False + usecallgraph = False usedevsrc = True suspendmode = 'boot' max_graph_depth = 2 @@ -61,10 +66,12 @@ class SystemValues(aslib.SystemValues): manual = False iscronjob = False timeformat = '%.6f' + bootloader = 'grub' + blexec = [] def __init__(self): if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ): - self.phoronix = True - self.addlogs = True + self.embedded = True + self.dmesglog = True self.outfile = os.environ['LOG_FILE'] self.htmlfile = os.environ['LOG_FILE'] self.hostname = platform.node() @@ -76,42 +83,80 @@ class SystemValues(aslib.SystemValues): self.kernel = self.kernelVersion(val) else: self.kernel = 'unknown' + self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S') def kernelVersion(self, msg): return msg.split()[2] + def checkFtraceKernelVersion(self): + val = tuple(map(int, self.kernel.split('-')[0].split('.'))) + if val >= (4, 10, 0): + return True + return False def kernelParams(self): cmdline = 'initcall_debug log_buf_len=32M' if self.useftrace: - cmdline += ' trace_buf_size=128M trace_clock=global '\ + if self.cpucount > 0: + bs = min(self.memtotal / 2, 2*1024*1024) / self.cpucount + else: + bs = 131072 + cmdline += ' trace_buf_size=%dK trace_clock=global '\ 'trace_options=nooverwrite,funcgraph-abstime,funcgraph-cpu,'\ 'funcgraph-duration,funcgraph-proc,funcgraph-tail,'\ 'nofuncgraph-overhead,context-info,graph-time '\ 'ftrace=function_graph '\ 'ftrace_graph_max_depth=%d '\ 'ftrace_graph_filter=%s' % \ - (self.max_graph_depth, self.graph_filter) + (bs, self.max_graph_depth, self.graph_filter) return cmdline def setGraphFilter(self, val): - fp = open(self.tpath+'available_filter_functions') - master = fp.read().split('\n') - fp.close() + master = self.getBootFtraceFilterFunctions() + fs = '' for i in val.split(','): func = i.strip() + if func == '': + doError('badly formatted filter function string') + if '[' in func or ']' in func: + doError('loadable module functions not allowed - "%s"' % func) + if ' ' in func: + doError('spaces found in filter functions - "%s"' % func) if func not in master: doError('function "%s" not available for ftrace' % func) - self.graph_filter = val + if not fs: + fs = func + else: + fs += ','+func + if not fs: + doError('badly formatted filter function string') + self.graph_filter = fs + def getBootFtraceFilterFunctions(self): + self.rootCheck(True) + fp = open(self.tpath+'available_filter_functions') + fulllist = fp.read().split('\n') + fp.close() + list = [] + for i in fulllist: + if not i or ' ' in i or '[' in i or ']' in i: + continue + list.append(i) + return list + def myCronJob(self, line): + if '@reboot' not in line: + return False + if 'bootgraph' in line or 'analyze_boot.py' in line or '-cronjob' in line: + return True + return False def cronjobCmdString(self): cmdline = '%s -cronjob' % os.path.abspath(sys.argv[0]) args = iter(sys.argv[1:]) for arg in args: if arg in ['-h', '-v', '-cronjob', '-reboot']: continue - elif arg in ['-o', '-dmesg', '-ftrace', '-filter']: + elif arg in ['-o', '-dmesg', '-ftrace', '-func']: args.next() continue cmdline += ' '+arg if self.graph_filter != 'do_one_initcall': - cmdline += ' -filter "%s"' % self.graph_filter - cmdline += ' -o "%s"' % os.path.abspath(self.htmlfile) + cmdline += ' -func "%s"' % self.graph_filter + cmdline += ' -o "%s"' % os.path.abspath(self.testdir) return cmdline def manualRebootRequired(self): cmdline = self.kernelParams() @@ -121,6 +166,39 @@ class SystemValues(aslib.SystemValues): print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n' print 'CMDLINE="%s"' % cmdline sys.exit() + def getExec(self, cmd): + dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin', + '/usr/local/sbin', '/usr/local/bin'] + for path in dirlist: + cmdfull = os.path.join(path, cmd) + if os.path.exists(cmdfull): + return cmdfull + return '' + def blGrub(self): + blcmd = '' + for cmd in ['update-grub', 'grub-mkconfig', 'grub2-mkconfig']: + if blcmd: + break + blcmd = self.getExec(cmd) + if not blcmd: + doError('[GRUB] missing update command') + if not os.path.exists('/etc/default/grub'): + doError('[GRUB] missing /etc/default/grub') + if 'grub2' in blcmd: + cfg = '/boot/grub2/grub.cfg' + else: + cfg = '/boot/grub/grub.cfg' + if not os.path.exists(cfg): + doError('[GRUB] missing %s' % cfg) + if 'update-grub' in blcmd: + self.blexec = [blcmd] + else: + self.blexec = [blcmd, '-o', cfg] + def getBootLoader(self): + if self.bootloader == 'grub': + self.blGrub() + else: + doError('unknown boot loader: %s' % self.bootloader) sysvals = SystemValues() @@ -136,20 +214,23 @@ class Data(aslib.Data): idstr = '' html_device_id = 0 valid = False - initstart = 0.0 + tUserMode = 0.0 boottime = '' - phases = ['boot'] + phases = ['kernel', 'user'] do_one_initcall = False def __init__(self, num): self.testnumber = num self.idstr = 'a' self.dmesgtext = [] self.dmesg = { - 'boot': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, 'color': '#dddddd'} + 'kernel': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, + 'order': 0, 'color': 'linear-gradient(to bottom, #fff, #bcf)'}, + 'user': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, + 'order': 1, 'color': '#fff'} } def deviceTopology(self): return '' - def newAction(self, phase, name, start, end, ret, ulen): + def newAction(self, phase, name, pid, start, end, ret, ulen): # new device callback for a specific phase self.html_device_id += 1 devid = '%s%d' % (self.idstr, self.html_device_id) @@ -163,41 +244,46 @@ class Data(aslib.Data): name = '%s[%d]' % (origname, i) i += 1 list[name] = {'name': name, 'start': start, 'end': end, - 'pid': 0, 'length': length, 'row': 0, 'id': devid, + 'pid': pid, 'length': length, 'row': 0, 'id': devid, 'ret': ret, 'ulen': ulen } return name - def deviceMatch(self, cg): + def deviceMatch(self, pid, cg): if cg.end - cg.start == 0: return True - list = self.dmesg['boot']['list'] - for devname in list: - dev = list[devname] - if cg.name == 'do_one_initcall': - if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0): - dev['ftrace'] = cg - self.do_one_initcall = True - return True - else: - if(cg.start > dev['start'] and cg.end < dev['end']): - if 'ftraces' not in dev: - dev['ftraces'] = [] - dev['ftraces'].append(cg) - return True + for p in data.phases: + list = self.dmesg[p]['list'] + for devname in list: + dev = list[devname] + if pid != dev['pid']: + continue + if cg.name == 'do_one_initcall': + if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0): + dev['ftrace'] = cg + self.do_one_initcall = True + return True + else: + if(cg.start > dev['start'] and cg.end < dev['end']): + if 'ftraces' not in dev: + dev['ftraces'] = [] + dev['ftraces'].append(cg) + return True return False # ----------------- FUNCTIONS -------------------- -# Function: loadKernelLog +# Function: parseKernelLog # Description: -# Load a raw kernel log from dmesg -def loadKernelLog(): +# parse a kernel log for boot data +def parseKernelLog(): + phase = 'kernel' data = Data(0) - data.dmesg['boot']['start'] = data.start = ktime = 0.0 + data.dmesg['kernel']['start'] = data.start = ktime = 0.0 sysvals.stamp = { 'time': datetime.now().strftime('%B %d %Y, %I:%M:%S %p'), 'host': sysvals.hostname, 'mode': 'boot', 'kernel': ''} + tp = aslib.TestProps() devtemp = dict() if(sysvals.dmesgfile): lf = open(sysvals.dmesgfile, 'r') @@ -205,6 +291,13 @@ def loadKernelLog(): lf = Popen('dmesg', stdout=PIPE).stdout for line in lf: line = line.replace('\r\n', '') + # grab the stamp and sysinfo + if re.match(tp.stampfmt, line): + tp.stamp = line + continue + elif re.match(tp.sysinfofmt, line): + tp.sysinfo = line + continue idx = line.find('[') if idx > 1: line = line[idx:] @@ -215,7 +308,6 @@ def loadKernelLog(): if(ktime > 120): break msg = m.group('msg') - data.end = data.initstart = ktime data.dmesgtext.append(line) if(ktime == 0.0 and re.match('^Linux version .*', msg)): if(not sysvals.stamp['kernel']): @@ -228,43 +320,39 @@ def loadKernelLog(): data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S') sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p') continue - m = re.match('^calling *(?P<f>.*)\+.*', msg) + m = re.match('^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg) if(m): - devtemp[m.group('f')] = ktime + func = m.group('f') + pid = int(m.group('p')) + devtemp[func] = (ktime, pid) continue m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg) if(m): data.valid = True + data.end = ktime f, r, t = m.group('f', 'r', 't') if(f in devtemp): - data.newAction('boot', f, devtemp[f], ktime, int(r), int(t)) - data.end = ktime + start, pid = devtemp[f] + data.newAction(phase, f, pid, start, ktime, int(r), int(t)) del devtemp[f] continue if(re.match('^Freeing unused kernel memory.*', msg)): - break - - data.dmesg['boot']['end'] = data.end + data.tUserMode = ktime + data.dmesg['kernel']['end'] = ktime + data.dmesg['user']['start'] = ktime + phase = 'user' + + if tp.stamp: + sysvals.stamp = 0 + tp.parseStamp(data, sysvals) + data.dmesg['user']['end'] = data.end lf.close() return data -# Function: loadTraceLog +# Function: parseTraceLog # Description: # Check if trace is available and copy to a temp file -def loadTraceLog(data): - # load the data to a temp file if none given - if not sysvals.ftracefile: - lib = aslib.sysvals - aslib.rootCheck(True) - if not lib.verifyFtrace(): - doError('ftrace not available') - if lib.fgetVal('current_tracer').strip() != 'function_graph': - doError('ftrace not configured for a boot callgraph') - sysvals.ftracefile = '/tmp/boot_ftrace.%s.txt' % os.getpid() - call('cat '+lib.tpath+'trace > '+sysvals.ftracefile, shell=True) - if not sysvals.ftracefile: - doError('No trace data available') - +def parseTraceLog(data): # parse the trace log ftemp = dict() tp = aslib.TestProps() @@ -306,9 +394,29 @@ def loadTraceLog(data): print('Sanity check failed for %s-%d' % (proc, pid)) continue # match cg data to devices - if not data.deviceMatch(cg): + if not data.deviceMatch(pid, cg): print ' BAD: %s %s-%d [%f - %f]' % (cg.name, proc, pid, cg.start, cg.end) +# Function: retrieveLogs +# Description: +# Create copies of dmesg and/or ftrace for later processing +def retrieveLogs(): + # check ftrace is configured first + if sysvals.useftrace: + tracer = sysvals.fgetVal('current_tracer').strip() + if tracer != 'function_graph': + doError('ftrace not configured for a boot callgraph') + # create the folder and get dmesg + sysvals.systemInfo(aslib.dmidecode(sysvals.mempath)) + sysvals.initTestOutput('boot') + sysvals.writeDatafileHeader(sysvals.dmesgfile) + call('dmesg >> '+sysvals.dmesgfile, shell=True) + if not sysvals.useftrace: + return + # get ftrace + sysvals.writeDatafileHeader(sysvals.ftracefile) + call('cat '+sysvals.tpath+'trace >> '+sysvals.ftracefile, shell=True) + # Function: colorForName # Description: # Generate a repeatable color from a list for a given name @@ -353,18 +461,19 @@ def cgOverview(cg, minlen): # testruns: array of Data objects from parseKernelLog or parseTraceLog # Output: # True if the html file was created, false if it failed -def createBootGraph(data, embedded): +def createBootGraph(data): # html function templates html_srccall = '<div id={6} title="{5}" class="srccall" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{0}</div>\n' html_timetotal = '<table class="time1">\n<tr>'\ - '<td class="blue">Time from Kernel Boot to start of User Mode: <b>{0} ms</b></td>'\ + '<td class="blue">Init process starts @ <b>{0} ms</b></td>'\ + '<td class="blue">Last initcall ends @ <b>{1} ms</b></td>'\ '</tr>\n</table>\n' # device timeline devtl = aslib.Timeline(100, 20) # write the test title and general info header - devtl.createHeader(sysvals, 'noftrace') + devtl.createHeader(sysvals) # Generate the header for this timeline t0 = data.start @@ -373,84 +482,98 @@ def createBootGraph(data, embedded): if(tTotal == 0): print('ERROR: No timeline data') return False - boot_time = '%.0f'%(tTotal*1000) - devtl.html += html_timetotal.format(boot_time) + user_mode = '%.0f'%(data.tUserMode*1000) + last_init = '%.0f'%(tTotal*1000) + devtl.html += html_timetotal.format(user_mode, last_init) # determine the maximum number of rows we need to draw - phase = 'boot' - list = data.dmesg[phase]['list'] devlist = [] - for devname in list: - d = aslib.DevItem(0, phase, list[devname]) - devlist.append(d) - devtl.getPhaseRows(devlist) + for p in data.phases: + list = data.dmesg[p]['list'] + for devname in list: + d = aslib.DevItem(0, p, list[devname]) + devlist.append(d) + devtl.getPhaseRows(devlist, 0, 'start') devtl.calcTotalRows() # draw the timeline background devtl.createZoomBox() - boot = data.dmesg[phase] - length = boot['end']-boot['start'] - left = '%.3f' % (((boot['start']-t0)*100.0)/tTotal) - width = '%.3f' % ((length*100.0)/tTotal) - devtl.html += devtl.html_tblock.format(phase, left, width, devtl.scaleH) - devtl.html += devtl.html_phase.format('0', '100', \ - '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \ - 'white', '') + devtl.html += devtl.html_tblock.format('boot', '0', '100', devtl.scaleH) + for p in data.phases: + phase = data.dmesg[p] + length = phase['end']-phase['start'] + left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal) + width = '%.3f' % ((length*100.0)/tTotal) + devtl.html += devtl.html_phase.format(left, width, \ + '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \ + phase['color'], '') # draw the device timeline num = 0 devstats = dict() - for devname in sorted(list): - cls, color = colorForName(devname) - dev = list[devname] - info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0, - dev['ulen']/1000.0, dev['ret']) - devstats[dev['id']] = {'info':info} - dev['color'] = color - height = devtl.phaseRowHeight(0, phase, dev['row']) - top = '%.6f' % ((dev['row']*height) + devtl.scaleH) - left = '%.6f' % (((dev['start']-t0)*100)/tTotal) - width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal) - length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000) - devtl.html += devtl.html_device.format(dev['id'], - devname+length+'kernel_mode', left, top, '%.3f'%height, - width, devname, ' '+cls, '') - rowtop = devtl.phaseRowTop(0, phase, dev['row']) - height = '%.6f' % (devtl.rowH / 2) - top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2)) - if data.do_one_initcall: - if('ftrace' not in dev): + for phase in data.phases: + list = data.dmesg[phase]['list'] + for devname in sorted(list): + cls, color = colorForName(devname) + dev = list[devname] + info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0, + dev['ulen']/1000.0, dev['ret']) + devstats[dev['id']] = {'info':info} + dev['color'] = color + height = devtl.phaseRowHeight(0, phase, dev['row']) + top = '%.6f' % ((dev['row']*height) + devtl.scaleH) + left = '%.6f' % (((dev['start']-t0)*100)/tTotal) + width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal) + length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000) + devtl.html += devtl.html_device.format(dev['id'], + devname+length+phase+'_mode', left, top, '%.3f'%height, + width, devname, ' '+cls, '') + rowtop = devtl.phaseRowTop(0, phase, dev['row']) + height = '%.6f' % (devtl.rowH / 2) + top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2)) + if data.do_one_initcall: + if('ftrace' not in dev): + continue + cg = dev['ftrace'] + large, stats = cgOverview(cg, 0.001) + devstats[dev['id']]['fstat'] = stats + for l in large: + left = '%f' % (((l.time-t0)*100)/tTotal) + width = '%f' % (l.length*100/tTotal) + title = '%s (%0.3fms)' % (l.name, l.length * 1000.0) + devtl.html += html_srccall.format(l.name, left, + top, height, width, title, 'x%d'%num) + num += 1 + continue + if('ftraces' not in dev): continue - cg = dev['ftrace'] - large, stats = cgOverview(cg, 0.001) - devstats[dev['id']]['fstat'] = stats - for l in large: - left = '%f' % (((l.time-t0)*100)/tTotal) - width = '%f' % (l.length*100/tTotal) - title = '%s (%0.3fms)' % (l.name, l.length * 1000.0) - devtl.html += html_srccall.format(l.name, left, - top, height, width, title, 'x%d'%num) + for cg in dev['ftraces']: + left = '%f' % (((cg.start-t0)*100)/tTotal) + width = '%f' % ((cg.end-cg.start)*100/tTotal) + cglen = (cg.end - cg.start) * 1000.0 + title = '%s (%0.3fms)' % (cg.name, cglen) + cg.id = 'x%d' % num + devtl.html += html_srccall.format(cg.name, left, + top, height, width, title, dev['id']+cg.id) num += 1 - continue - if('ftraces' not in dev): - continue - for cg in dev['ftraces']: - left = '%f' % (((cg.start-t0)*100)/tTotal) - width = '%f' % ((cg.end-cg.start)*100/tTotal) - cglen = (cg.end - cg.start) * 1000.0 - title = '%s (%0.3fms)' % (cg.name, cglen) - cg.id = 'x%d' % num - devtl.html += html_srccall.format(cg.name, left, - top, height, width, title, dev['id']+cg.id) - num += 1 # draw the time scale, try to make the number of labels readable - devtl.createTimeScale(t0, tMax, tTotal, phase) + devtl.createTimeScale(t0, tMax, tTotal, 'boot') devtl.html += '</div>\n' # timeline is finished devtl.html += '</div>\n</div>\n' + # draw a legend which describes the phases by color + devtl.html += '<div class="legend">\n' + pdelta = 20.0 + pmargin = 36.0 + for phase in data.phases: + order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin) + devtl.html += devtl.html_legend.format(order, \ + data.dmesg[phase]['color'], phase+'_mode', phase[0]) + devtl.html += '</div>\n' + if(sysvals.outfile == sysvals.htmlfile): hf = open(sysvals.htmlfile, 'a') else: @@ -474,7 +597,7 @@ def createBootGraph(data, embedded): .fstat td {text-align:left;width:35px;}\n\ .srccall {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\ .srccall:hover {color:white;font-weight:bold;border:1px solid white;}\n' - if(not embedded): + if(not sysvals.embedded): aslib.addCSS(hf, sysvals, 1, False, extra) # write the device timeline @@ -495,9 +618,11 @@ def createBootGraph(data, embedded): html = \ '<div id="devicedetailtitle"></div>\n'\ '<div id="devicedetail" style="display:none;">\n'\ - '<div id="devicedetail0">\n'\ - '<div id="kernel_mode" class="phaselet" style="left:0%;width:100%;background:#DDDDDD"></div>\n'\ - '</div>\n</div>\n'\ + '<div id="devicedetail0">\n' + for p in data.phases: + phase = data.dmesg[p] + html += devtl.html_phaselet.format(p+'_mode', '0', '100', phase['color']) + html += '</div>\n</div>\n'\ '<script type="text/javascript">\n'+statinfo+\ '</script>\n' hf.write(html) @@ -507,21 +632,21 @@ def createBootGraph(data, embedded): aslib.addCallgraphs(sysvals, hf, data) # add the dmesg log as a hidden div - if sysvals.addlogs: + if sysvals.dmesglog: hf.write('<div id="dmesglog" style="display:none;">\n') for line in data.dmesgtext: line = line.replace('<', '<').replace('>', '>') hf.write(line) hf.write('</div>\n') - if(not embedded): + if(not sysvals.embedded): # write the footer and close aslib.addScriptCode(hf, [data]) hf.write('</body>\n</html>\n') else: # embedded out will be loaded in a page, skip the js hf.write('<div id=bounds style=display:none>%f,%f</div>' % \ - (data.start*1000, data.initstart*1000)) + (data.start*1000, data.end*1000)) hf.close() return True @@ -533,17 +658,20 @@ def updateCron(restore=False): if not restore: sysvals.rootUser(True) crondir = '/var/spool/cron/crontabs/' - cronfile = crondir+'root' - backfile = crondir+'root-analyze_boot-backup' + if not os.path.exists(crondir): + crondir = '/var/spool/cron/' if not os.path.exists(crondir): doError('%s not found' % crondir) - out = Popen(['which', 'crontab'], stdout=PIPE).stdout.read() - if not out: + cronfile = crondir+'root' + backfile = crondir+'root-analyze_boot-backup' + cmd = sysvals.getExec('crontab') + if not cmd: doError('crontab not found') # on restore: move the backup cron back into place if restore: if os.path.exists(backfile): shutil.move(backfile, cronfile) + call([cmd, cronfile]) return # backup current cron and install new one with reboot if os.path.exists(cronfile): @@ -556,13 +684,13 @@ def updateCron(restore=False): fp = open(backfile, 'r') op = open(cronfile, 'w') for line in fp: - if '@reboot' not in line: + if not sysvals.myCronJob(line): op.write(line) continue fp.close() op.write('@reboot python %s\n' % sysvals.cronjobCmdString()) op.close() - res = call('crontab %s' % cronfile, shell=True) + res = call([cmd, cronfile]) except Exception, e: print 'Exception: %s' % str(e) shutil.move(backfile, cronfile) @@ -577,25 +705,16 @@ def updateGrub(restore=False): # call update-grub on restore if restore: try: - call(['update-grub'], stderr=PIPE, stdout=PIPE, + call(sysvals.blexec, stderr=PIPE, stdout=PIPE, env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'}) except Exception, e: print 'Exception: %s\n' % str(e) return - # verify we can do this - sysvals.rootUser(True) - grubfile = '/etc/default/grub' - if not os.path.exists(grubfile): - print 'ERROR: Unable to set the kernel parameters via grub.\n' - sysvals.manualRebootRequired() - out = Popen(['which', 'update-grub'], stdout=PIPE).stdout.read() - if not out: - print 'ERROR: Unable to set the kernel parameters via grub.\n' - sysvals.manualRebootRequired() - # extract the option and create a grub config without it + sysvals.rootUser(True) tgtopt = 'GRUB_CMDLINE_LINUX_DEFAULT' cmdline = '' + grubfile = '/etc/default/grub' tempfile = '/etc/default/grub.analyze_boot' shutil.move(grubfile, tempfile) res = -1 @@ -622,7 +741,7 @@ def updateGrub(restore=False): # if the target option value is in quotes, strip them sp = '"' val = cmdline.strip() - if val[0] == '\'' or val[0] == '"': + if val and (val[0] == '\'' or val[0] == '"'): sp = val[0] val = val.strip(sp) cmdline = val @@ -633,7 +752,7 @@ def updateGrub(restore=False): # write out the updated target option op.write('\n%s=%s%s%s\n' % (tgtopt, sp, cmdline, sp)) op.close() - res = call('update-grub') + res = call(sysvals.blexec) os.remove(grubfile) except Exception, e: print 'Exception: %s' % str(e) @@ -641,10 +760,18 @@ def updateGrub(restore=False): # cleanup shutil.move(tempfile, grubfile) if res != 0: - doError('update-grub failed') + doError('update grub failed') -# Function: doError +# Function: updateKernelParams # Description: +# update boot conf for all kernels with our parameters +def updateKernelParams(restore=False): + # find the boot loader + sysvals.getBootLoader() + if sysvals.bootloader == 'grub': + updateGrub(restore) + +# Function: doError Description: # generic error function for catastrphic failures # Arguments: # msg: the error message to print @@ -660,7 +787,7 @@ def doError(msg, help=False): # print out the help text def printHelp(): print('') - print('%s v%.1f' % (sysvals.title, sysvals.version)) + print('%s v%s' % (sysvals.title, sysvals.version)) print('Usage: bootgraph <options> <command>') print('') print('Description:') @@ -669,13 +796,19 @@ def printHelp(): print(' the start of the init process.') print('') print(' If no specific command is given the tool reads the current dmesg') - print(' and/or ftrace log and outputs bootgraph.html') + print(' and/or ftrace log and creates a timeline') + print('') + print(' Generates output files in subdirectory: boot-yymmdd-HHMMSS') + print(' HTML output: <hostname>_boot.html') + print(' raw dmesg output: <hostname>_boot_dmesg.txt') + print(' raw ftrace output: <hostname>_boot_ftrace.txt') print('') print('Options:') print(' -h Print this help text') print(' -v Print the current tool version') print(' -addlogs Add the dmesg log to the html output') - print(' -o file Html timeline name (default: bootgraph.html)') + print(' -o name Overrides the output subdirectory name when running a new test') + print(' default: boot-{date}-{time}') print(' [advanced]') print(' -f Use ftrace to add function detail (default: disabled)') print(' -callgraph Add callgraph detail, can be very large (default: disabled)') @@ -683,13 +816,18 @@ def printHelp(): print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)') print(' -timeprec N Number of significant digits in timestamps (0:S, 3:ms, [6:us])') print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)') - print(' -filter list Limit ftrace to comma-delimited list of functions (default: do_one_initcall)') - print(' [commands]') + print(' -func list Limit ftrace to comma-delimited list of functions (default: do_one_initcall)') + print(' -cgfilter S Filter the callgraph output in the timeline') + print(' -bl name Use the following boot loader for kernel params (default: grub)') print(' -reboot Reboot the machine automatically and generate a new timeline') - print(' -manual Show the requirements to generate a new timeline manually') - print(' -dmesg file Load a stored dmesg file (used with -ftrace)') - print(' -ftrace file Load a stored ftrace file (used with -dmesg)') + print(' -manual Show the steps to generate a new timeline manually (used with -reboot)') + print('') + print('Other commands:') print(' -flistall Print all functions capable of being captured in ftrace') + print(' -sysinfo Print out system info extracted from BIOS') + print(' [redo]') + print(' -dmesg file Create HTML output using dmesg input (used with -ftrace)') + print(' -ftrace file Create HTML output using ftrace input (used with -dmesg)') print('') return True @@ -698,14 +836,15 @@ def printHelp(): if __name__ == '__main__': # loop through the command line arguments cmd = '' - simplecmds = ['-updategrub', '-flistall'] + testrun = True + simplecmds = ['-sysinfo', '-kpupdate', '-flistall', '-checkbl'] args = iter(sys.argv[1:]) for arg in args: if(arg == '-h'): printHelp() sys.exit() elif(arg == '-v'): - print("Version %.1f" % sysvals.version) + print("Version %s" % sysvals.version) sys.exit() elif(arg in simplecmds): cmd = arg[1:] @@ -716,16 +855,32 @@ if __name__ == '__main__': sysvals.usecallgraph = True elif(arg == '-mincg'): sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0) + elif(arg == '-cgfilter'): + try: + val = args.next() + except: + doError('No callgraph functions supplied', True) + sysvals.setDeviceFilter(val) + elif(arg == '-bl'): + try: + val = args.next() + except: + doError('No boot loader name supplied', True) + if val.lower() not in ['grub']: + doError('Unknown boot loader: %s' % val, True) + sysvals.bootloader = val.lower() elif(arg == '-timeprec'): sysvals.setPrecision(aslib.getArgInt('-timeprec', args, 0, 6)) elif(arg == '-maxdepth'): sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000) - elif(arg == '-filter'): + elif(arg == '-func'): try: val = args.next() except: doError('No filter functions supplied', True) - aslib.rootCheck(True) + sysvals.useftrace = True + sysvals.usecallgraph = True + sysvals.rootCheck(True) sysvals.setGraphFilter(val) elif(arg == '-ftrace'): try: @@ -734,9 +889,10 @@ if __name__ == '__main__': doError('No ftrace file supplied', True) if(os.path.exists(val) == False): doError('%s does not exist' % val) + testrun = False sysvals.ftracefile = val elif(arg == '-addlogs'): - sysvals.addlogs = True + sysvals.dmesglog = True elif(arg == '-expandcg'): sysvals.cgexp = True elif(arg == '-dmesg'): @@ -748,18 +904,15 @@ if __name__ == '__main__': doError('%s does not exist' % val) if(sysvals.htmlfile == val or sysvals.outfile == val): doError('Output filename collision') + testrun = False sysvals.dmesgfile = val elif(arg == '-o'): try: val = args.next() except: - doError('No HTML filename supplied', True) - if(sysvals.dmesgfile == val or sysvals.ftracefile == val): - doError('Output filename collision') - sysvals.htmlfile = val + doError('No subdirectory name supplied', True) + sysvals.testdir = sysvals.setOutputFolder(val) elif(arg == '-reboot'): - if sysvals.iscronjob: - doError('-reboot and -cronjob are incompatible') sysvals.reboot = True elif(arg == '-manual'): sysvals.reboot = True @@ -767,58 +920,93 @@ if __name__ == '__main__': # remaining options are only for cron job use elif(arg == '-cronjob'): sysvals.iscronjob = True - if sysvals.reboot: - doError('-reboot and -cronjob are incompatible') else: doError('Invalid argument: '+arg, True) + # compatibility errors and access checks + if(sysvals.iscronjob and (sysvals.reboot or \ + sysvals.dmesgfile or sysvals.ftracefile or cmd)): + doError('-cronjob is meant for batch purposes only') + if(sysvals.reboot and (sysvals.dmesgfile or sysvals.ftracefile)): + doError('-reboot and -dmesg/-ftrace are incompatible') + if cmd or sysvals.reboot or sysvals.iscronjob or testrun: + sysvals.rootCheck(True) + if (testrun and sysvals.useftrace) or cmd == 'flistall': + if not sysvals.verifyFtrace(): + doError('Ftrace is not properly enabled') + + # run utility commands + sysvals.cpuInfo() if cmd != '': - if cmd == 'updategrub': - updateGrub() + if cmd == 'kpupdate': + updateKernelParams() elif cmd == 'flistall': - sysvals.getFtraceFilterFunctions(False) + for f in sysvals.getBootFtraceFilterFunctions(): + print f + elif cmd == 'checkbl': + sysvals.getBootLoader() + print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec) + elif(cmd == 'sysinfo'): + sysvals.printSystemInfo() sys.exit() - # update grub, setup a cronjob, and reboot + # reboot: update grub, setup a cronjob, and reboot if sysvals.reboot: + if (sysvals.useftrace or sysvals.usecallgraph) and \ + not sysvals.checkFtraceKernelVersion(): + doError('Ftrace functionality requires kernel v4.10 or newer') if not sysvals.manual: - updateGrub() + updateKernelParams() updateCron() call('reboot') else: sysvals.manualRebootRequired() sys.exit() - # disable the cronjob + # cronjob: remove the cronjob, grub changes, and disable ftrace if sysvals.iscronjob: updateCron(True) - updateGrub(True) + updateKernelParams(True) + try: + sysvals.fsetVal('0', 'tracing_on') + except: + pass - data = loadKernelLog() - if sysvals.useftrace: - loadTraceLog(data) - if sysvals.iscronjob: - try: - sysvals.fsetVal('0', 'tracing_on') - except: - pass + # testrun: generate copies of the logs + if testrun: + retrieveLogs() + else: + sysvals.setOutputFile() - if(sysvals.outfile and sysvals.phoronix): - fp = open(sysvals.outfile, 'w') - fp.write('pass %s initstart %.3f end %.3f boot %s\n' % - (data.valid, data.initstart*1000, data.end*1000, data.boottime)) - fp.close() - if(not data.valid): - if sysvals.dmesgfile: + # process the log data + if sysvals.dmesgfile: + data = parseKernelLog() + if(not data.valid): doError('No initcall data found in %s' % sysvals.dmesgfile) - else: - doError('No initcall data found, is initcall_debug enabled?') + if sysvals.useftrace and sysvals.ftracefile: + parseTraceLog(data) + else: + doError('dmesg file required') print(' Host: %s' % sysvals.hostname) print(' Test time: %s' % sysvals.testtime) print(' Boot time: %s' % data.boottime) print('Kernel Version: %s' % sysvals.kernel) print(' Kernel start: %.3f' % (data.start * 1000)) - print(' init start: %.3f' % (data.initstart * 1000)) + print('Usermode start: %.3f' % (data.tUserMode * 1000)) + print('Last Init Call: %.3f' % (data.end * 1000)) + + # handle embedded output logs + if(sysvals.outfile and sysvals.embedded): + fp = open(sysvals.outfile, 'w') + fp.write('pass %s initstart %.3f end %.3f boot %s\n' % + (data.valid, data.tUserMode*1000, data.end*1000, data.boottime)) + fp.close() + + createBootGraph(data) - createBootGraph(data, sysvals.phoronix) + # if running as root, change output dir owner to sudo_user + if testrun and os.path.isdir(sysvals.testdir) and \ + os.getuid() == 0 and 'SUDO_USER' in os.environ: + cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1' + call(cmd.format(os.environ['SUDO_USER'], sysvals.testdir), shell=True) diff --git a/tools/power/pm-graph/analyze_suspend.py b/tools/power/pm-graph/analyze_suspend.py index a9206e67fc1f..1b60fe203741 100755 --- a/tools/power/pm-graph/analyze_suspend.py +++ b/tools/power/pm-graph/analyze_suspend.py @@ -68,10 +68,12 @@ from subprocess import call, Popen, PIPE # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '4.6' + version = '4.7' ansi = False verbose = False - addlogs = False + testlog = True + dmesglog = False + ftracelog = False mindevlen = 0.0 mincglen = 0.0 cgphase = '' @@ -79,10 +81,11 @@ class SystemValues: max_graph_depth = 0 callloopmaxgap = 0.0001 callloopmaxlen = 0.005 + cpucount = 0 + memtotal = 204800 srgap = 0 cgexp = False - outdir = '' - testdir = '.' + testdir = '' tpath = '/sys/kernel/debug/tracing/' fpdtpath = '/sys/firmware/acpi/tables/FPDT' epath = '/sys/kernel/debug/tracing/events/power/' @@ -95,14 +98,17 @@ class SystemValues: testcommand = '' mempath = '/dev/mem' powerfile = '/sys/power/state' + mempowerfile = '/sys/power/mem_sleep' suspendmode = 'mem' + memmode = '' hostname = 'localhost' prefix = 'test' teststamp = '' + sysstamp = '' dmesgstart = 0.0 dmesgfile = '' ftracefile = '' - htmlfile = '' + htmlfile = 'output.html' embedded = False rtcwake = True rtcwaketime = 15 @@ -127,9 +133,6 @@ class SystemValues: devpropfmt = '# Device Properties: .*' tracertypefmt = '# tracer: (?P<t>.*)' firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$' - stampfmt = '# suspend-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ - '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ - ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' tracefuncs = { 'sys_sync': dict(), 'pm_prepare_console': dict(), @@ -218,7 +221,7 @@ class SystemValues: # if this is a phoronix test run, set some default options if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ): self.embedded = True - self.addlogs = True + self.dmesglog = self.ftracelog = True self.htmlfile = os.environ['LOG_FILE'] self.archargs = 'args_'+platform.machine() self.hostname = platform.node() @@ -233,6 +236,13 @@ class SystemValues: self.rtcpath = rtc if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()): self.ansi = True + self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S') + def rootCheck(self, fatal=True): + if(os.access(self.powerfile, os.W_OK)): + return True + if fatal: + doError('This command requires sysfs mount and root access') + return False def rootUser(self, fatal=False): if 'USER' in os.environ and os.environ['USER'] == 'root': return True @@ -249,30 +259,60 @@ class SystemValues: args['date'] = n.strftime('%y%m%d') args['time'] = n.strftime('%H%M%S') args['hostname'] = self.hostname - self.outdir = value.format(**args) + return value.format(**args) def setOutputFile(self): - if((self.htmlfile == '') and (self.dmesgfile != '')): + if self.dmesgfile != '': m = re.match('(?P<name>.*)_dmesg\.txt$', self.dmesgfile) if(m): self.htmlfile = m.group('name')+'.html' - if((self.htmlfile == '') and (self.ftracefile != '')): + if self.ftracefile != '': m = re.match('(?P<name>.*)_ftrace\.txt$', self.ftracefile) if(m): self.htmlfile = m.group('name')+'.html' - if(self.htmlfile == ''): - self.htmlfile = 'output.html' - def initTestOutput(self, subdir, testpath=''): + def systemInfo(self, info): + p = c = m = b = '' + if 'baseboard-manufacturer' in info: + m = info['baseboard-manufacturer'] + elif 'system-manufacturer' in info: + m = info['system-manufacturer'] + if 'baseboard-product-name' in info: + p = info['baseboard-product-name'] + elif 'system-product-name' in info: + p = info['system-product-name'] + if 'processor-version' in info: + c = info['processor-version'] + if 'bios-version' in info: + b = info['bios-version'] + self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | numcpu:%d | memsz:%d' % \ + (m, p, c, b, self.cpucount, self.memtotal) + def printSystemInfo(self): + self.rootCheck(True) + out = dmidecode(self.mempath, True) + fmt = '%-24s: %s' + for name in sorted(out): + print fmt % (name, out[name]) + print fmt % ('cpucount', ('%d' % self.cpucount)) + print fmt % ('memtotal', ('%d kB' % self.memtotal)) + def cpuInfo(self): + self.cpucount = 0 + fp = open('/proc/cpuinfo', 'r') + for line in fp: + if re.match('^processor[ \t]*:[ \t]*[0-9]*', line): + self.cpucount += 1 + fp.close() + fp = open('/proc/meminfo', 'r') + for line in fp: + m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line) + if m: + self.memtotal = int(m.group('sz')) + break + fp.close() + def initTestOutput(self, name): self.prefix = self.hostname v = open('/proc/version', 'r').read().strip() kver = string.split(v)[2] - n = datetime.now() - testtime = n.strftime('suspend-%m%d%y-%H%M%S') - if not testpath: - testpath = n.strftime('suspend-%y%m%d-%H%M%S') - if(subdir != "."): - self.testdir = subdir+"/"+testpath - else: - self.testdir = testpath + fmt = name+'-%m%d%y-%H%M%S' + testtime = datetime.now().strftime(fmt) self.teststamp = \ '# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver if(self.embedded): @@ -355,7 +395,7 @@ class SystemValues: continue self.tracefuncs[i] = dict() def getFtraceFilterFunctions(self, current): - rootCheck(True) + self.rootCheck(True) if not current: call('cat '+self.tpath+'available_filter_functions', shell=True) return @@ -453,7 +493,7 @@ class SystemValues: val += '\nr:%s_ret %s $retval\n' % (name, func) return val def addKprobes(self, output=False): - if len(sysvals.kprobes) < 1: + if len(self.kprobes) < 1: return if output: print(' kprobe functions in this kernel:') @@ -525,7 +565,7 @@ class SystemValues: fp.flush() fp.close() except: - pass + return False return True def fgetVal(self, path): file = self.tpath+path @@ -566,9 +606,15 @@ class SystemValues: self.cleanupFtrace() # set the trace clock to global self.fsetVal('global', 'trace_clock') - # set trace buffer to a huge value self.fsetVal('nop', 'current_tracer') - self.fsetVal('131073', 'buffer_size_kb') + # set trace buffer to a huge value + if self.usecallgraph or self.usedevsrc: + tgtsize = min(self.memtotal / 2, 2*1024*1024) + maxbuf = '%d' % (tgtsize / max(1, self.cpucount)) + if self.cpucount < 1 or not self.fsetVal(maxbuf, 'buffer_size_kb'): + self.fsetVal('131072', 'buffer_size_kb') + else: + self.fsetVal('16384', 'buffer_size_kb') # go no further if this is just a status check if testing: return @@ -641,6 +687,15 @@ class SystemValues: if not self.ansi: return str return '\x1B[%d;40m%s\x1B[m' % (color, str) + def writeDatafileHeader(self, filename, fwdata=[]): + fp = open(filename, 'w') + fp.write(self.teststamp+'\n') + fp.write(self.sysstamp+'\n') + if(self.suspendmode == 'mem' or self.suspendmode == 'command'): + for fw in fwdata: + if(fw): + fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) + fp.close() sysvals = SystemValues() suspendmodename = { @@ -1008,6 +1063,12 @@ class Data: else: self.trimTime(self.tSuspended, \ self.tResumed-self.tSuspended, False) + def getTimeValues(self): + sktime = (self.dmesg['suspend_machine']['end'] - \ + self.tKernSus) * 1000 + rktime = (self.dmesg['resume_complete']['end'] - \ + self.dmesg['resume_machine']['start']) * 1000 + return (sktime, rktime) def setPhase(self, phase, ktime, isbegin): if(isbegin): self.dmesg[phase]['start'] = ktime @@ -1517,7 +1578,7 @@ class FTraceCallGraph: prelinedep += 1 last = 0 lasttime = line.time - virtualfname = 'execution_misalignment' + virtualfname = 'missing_function_name' if len(self.list) > 0: last = self.list[-1] lasttime = last.time @@ -1773,24 +1834,30 @@ class Timeline: html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n' html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n' html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n' + html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}"> {2}</div>\n' def __init__(self, rowheight, scaleheight): self.rowH = rowheight self.scaleH = scaleheight self.html = '' - def createHeader(self, sv, suppress=''): + def createHeader(self, sv): if(not sv.stamp['time']): return self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \ % (sv.title, sv.version) - if sv.logmsg and 'log' not in suppress: - self.html += '<button id="showtest" class="logbtn">log</button>' - if sv.addlogs and 'dmesg' not in suppress: - self.html += '<button id="showdmesg" class="logbtn">dmesg</button>' - if sv.addlogs and sv.ftracefile and 'ftrace' not in suppress: - self.html += '<button id="showftrace" class="logbtn">ftrace</button>' + if sv.logmsg and sv.testlog: + self.html += '<button id="showtest" class="logbtn btnfmt">log</button>' + if sv.dmesglog: + self.html += '<button id="showdmesg" class="logbtn btnfmt">dmesg</button>' + if sv.ftracelog: + self.html += '<button id="showftrace" class="logbtn btnfmt">ftrace</button>' headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n' self.html += headline_stamp.format(sv.stamp['host'], sv.stamp['kernel'], sv.stamp['mode'], sv.stamp['time']) + if 'man' in sv.stamp and 'plat' in sv.stamp and 'cpu' in sv.stamp: + headline_sysinfo = '<div class="stamp sysinfo">{0} {1} <i>with</i> {2}</div>\n' + self.html += headline_sysinfo.format(sv.stamp['man'], + sv.stamp['plat'], sv.stamp['cpu']) + # Function: getDeviceRows # Description: # determine how may rows the device funcs will take @@ -1839,7 +1906,7 @@ class Timeline: # devlist: the list of devices/actions in a group of contiguous phases # Output: # The total number of rows needed to display this phase of the timeline - def getPhaseRows(self, devlist, row=0): + def getPhaseRows(self, devlist, row=0, sortby='length'): # clear all rows and set them to undefined remaining = len(devlist) rowdata = dict() @@ -1852,8 +1919,12 @@ class Timeline: if tp not in myphases: myphases.append(tp) dev['row'] = -1 - # sort by length 1st, then name 2nd - sortdict[item] = (float(dev['end']) - float(dev['start']), item.dev['name']) + if sortby == 'start': + # sort by start 1st, then length 2nd + sortdict[item] = (-1*float(dev['start']), float(dev['end']) - float(dev['start'])) + else: + # sort by length 1st, then name 2nd + sortdict[item] = (float(dev['end']) - float(dev['start']), item.dev['name']) if 'src' in dev: dev['devrows'] = self.getDeviceRows(dev['src']) # sort the devlist by length so that large items graph on top @@ -1995,8 +2066,13 @@ class Timeline: # A list of values describing the properties of these test runs class TestProps: stamp = '' + sysinfo = '' S0i3 = False fwdata = [] + stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\ + '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\ + ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$' + sysinfofmt = '^# sysinfo .*' ftrace_line_fmt_fg = \ '^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\ ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\ @@ -2019,6 +2095,36 @@ class TestProps: self.ftrace_line_fmt = self.ftrace_line_fmt_nop else: doError('Invalid tracer format: [%s]' % tracer) + def parseStamp(self, data, sv): + m = re.match(self.stampfmt, self.stamp) + data.stamp = {'time': '', 'host': '', 'mode': ''} + dt = datetime(int(m.group('y'))+2000, int(m.group('m')), + int(m.group('d')), int(m.group('H')), int(m.group('M')), + int(m.group('S'))) + data.stamp['time'] = dt.strftime('%B %d %Y, %I:%M:%S %p') + data.stamp['host'] = m.group('host') + data.stamp['mode'] = m.group('mode') + data.stamp['kernel'] = m.group('kernel') + if re.match(self.sysinfofmt, self.sysinfo): + for f in self.sysinfo.split('|'): + if '#' in f: + continue + tmp = f.strip().split(':', 1) + key = tmp[0] + val = tmp[1] + data.stamp[key] = val + sv.hostname = data.stamp['host'] + sv.suspendmode = data.stamp['mode'] + if sv.suspendmode == 'command' and sv.ftracefile != '': + modes = ['on', 'freeze', 'standby', 'mem'] + out = Popen(['grep', 'suspend_enter', sv.ftracefile], + stderr=PIPE, stdout=PIPE).stdout.read() + m = re.match('.* suspend_enter\[(?P<mode>.*)\]', out) + if m and m.group('mode') in ['1', '2', '3']: + sv.suspendmode = modes[int(m.group('mode'))] + data.stamp['mode'] = sv.suspendmode + if not sv.stamp: + sv.stamp = data.stamp # Class: TestRun # Description: @@ -2090,35 +2196,6 @@ def vprint(msg): if(sysvals.verbose): print(msg) -# Function: parseStamp -# Description: -# Pull in the stamp comment line from the data file(s), -# create the stamp, and add it to the global sysvals object -# Arguments: -# m: the valid re.match output for the stamp line -def parseStamp(line, data): - m = re.match(sysvals.stampfmt, line) - data.stamp = {'time': '', 'host': '', 'mode': ''} - dt = datetime(int(m.group('y'))+2000, int(m.group('m')), - int(m.group('d')), int(m.group('H')), int(m.group('M')), - int(m.group('S'))) - data.stamp['time'] = dt.strftime('%B %d %Y, %I:%M:%S %p') - data.stamp['host'] = m.group('host') - data.stamp['mode'] = m.group('mode') - data.stamp['kernel'] = m.group('kernel') - sysvals.hostname = data.stamp['host'] - sysvals.suspendmode = data.stamp['mode'] - if sysvals.suspendmode == 'command' and sysvals.ftracefile != '': - modes = ['on', 'freeze', 'standby', 'mem'] - out = Popen(['grep', 'suspend_enter', sysvals.ftracefile], - stderr=PIPE, stdout=PIPE).stdout.read() - m = re.match('.* suspend_enter\[(?P<mode>.*)\]', out) - if m and m.group('mode') in ['1', '2', '3']: - sysvals.suspendmode = modes[int(m.group('mode'))] - data.stamp['mode'] = sysvals.suspendmode - if not sysvals.stamp: - sysvals.stamp = data.stamp - # Function: doesTraceLogHaveTraceEvents # Description: # Quickly determine if the ftrace log has some or all of the trace events @@ -2136,11 +2213,6 @@ def doesTraceLogHaveTraceEvents(): sysvals.usekprobes = True out = Popen(['head', '-1', sysvals.ftracefile], stderr=PIPE, stdout=PIPE).stdout.read().replace('\n', '') - m = re.match(sysvals.stampfmt, out) - if m and m.group('mode') == 'command': - sysvals.usetraceeventsonly = True - sysvals.usetraceevents = True - return # figure out what level of trace events are supported sysvals.usetraceeventsonly = True sysvals.usetraceevents = False @@ -2182,11 +2254,13 @@ def appendIncompleteTraceLog(testruns): for line in tf: # remove any latent carriage returns line = line.replace('\r\n', '') - # grab the time stamp - m = re.match(sysvals.stampfmt, line) - if(m): + # grab the stamp and sysinfo + if re.match(tp.stampfmt, line): tp.stamp = line continue + elif re.match(tp.sysinfofmt, line): + tp.sysinfo = line + continue # determine the trace data type (required for further parsing) m = re.match(sysvals.tracertypefmt, line) if(m): @@ -2219,7 +2293,7 @@ def appendIncompleteTraceLog(testruns): # look for the suspend start marker if(t.startMarker()): data = testrun[testidx].data - parseStamp(tp.stamp, data) + tp.parseStamp(data, sysvals) data.setStart(t.time) continue if(not data): @@ -2389,11 +2463,13 @@ def parseTraceLog(): for line in tf: # remove any latent carriage returns line = line.replace('\r\n', '') - # stamp line: each stamp means a new test run - m = re.match(sysvals.stampfmt, line) - if(m): + # stamp and sysinfo lines + if re.match(tp.stampfmt, line): tp.stamp = line continue + elif re.match(tp.sysinfofmt, line): + tp.sysinfo = line + continue # firmware line: pull out any firmware data m = re.match(sysvals.firmwarefmt, line) if(m): @@ -2439,7 +2515,7 @@ def parseTraceLog(): testdata.append(data) testrun = TestRun(data) testruns.append(testrun) - parseStamp(tp.stamp, data) + tp.parseStamp(data, sysvals) data.setStart(t.time) data.tKernSus = t.time continue @@ -2820,10 +2896,13 @@ def loadKernelLog(justtext=False): idx = line.find('[') if idx > 1: line = line[idx:] - m = re.match(sysvals.stampfmt, line) - if(m): + # grab the stamp and sysinfo + if re.match(tp.stampfmt, line): tp.stamp = line continue + elif re.match(tp.sysinfofmt, line): + tp.sysinfo = line + continue m = re.match(sysvals.firmwarefmt, line) if(m): tp.fwdata.append((int(m.group('s')), int(m.group('r')))) @@ -2839,7 +2918,7 @@ def loadKernelLog(justtext=False): if(data): testruns.append(data) data = Data(len(testruns)) - parseStamp(tp.stamp, data) + tp.parseStamp(data, sysvals) if len(tp.fwdata) > data.testnumber: data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber] if(data.fwSuspend > 0 or data.fwResume > 0): @@ -3170,6 +3249,8 @@ def addCallgraphs(sv, hf, data): continue list = data.dmesg[p]['list'] for devname in data.sortedDevices(p): + if len(sv.devicefilter) > 0 and devname not in sv.devicefilter: + continue dev = list[devname] color = 'white' if 'color' in data.dmesg[p]: @@ -3309,7 +3390,6 @@ def createHTML(testruns): html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">ERROR→</div>\n' html_traceevent = '<div title="{0}" class="traceevent{6}" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;{7}">{5}</div>\n' html_cpuexec = '<div class="jiffie" style="left:{0}%;top:{1}px;height:{2}px;width:{3}%;background:{4};"></div>\n' - html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}"> {2}</div>\n' html_timetotal = '<table class="time1">\n<tr>'\ '<td class="green" title="{3}">{2} Suspend Time: <b>{0} ms</b></td>'\ '<td class="yellow" title="{4}">{2} Resume Time: <b>{1} ms</b></td>'\ @@ -3346,10 +3426,7 @@ def createHTML(testruns): # Generate the header for this timeline for data in testruns: tTotal = data.end - data.start - sktime = (data.dmesg['suspend_machine']['end'] - \ - data.tKernSus) * 1000 - rktime = (data.dmesg['resume_complete']['end'] - \ - data.dmesg['resume_machine']['start']) * 1000 + sktime, rktime = data.getTimeValues() if(tTotal == 0): print('ERROR: No timeline data') sys.exit() @@ -3581,7 +3658,7 @@ def createHTML(testruns): id += tmp[1][0] order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin) name = string.replace(phase, '_', ' ') - devtl.html += html_legend.format(order, \ + devtl.html += devtl.html_legend.format(order, \ data.dmesg[phase]['color'], name, id) devtl.html += '</div>\n' @@ -3628,10 +3705,10 @@ def createHTML(testruns): addCallgraphs(sysvals, hf, data) # add the test log as a hidden div - if sysvals.logmsg: + if sysvals.testlog and sysvals.logmsg: hf.write('<div id="testlog" style="display:none;">\n'+sysvals.logmsg+'</div>\n') # add the dmesg log as a hidden div - if sysvals.addlogs and sysvals.dmesgfile: + if sysvals.dmesglog and sysvals.dmesgfile: hf.write('<div id="dmesglog" style="display:none;">\n') lf = open(sysvals.dmesgfile, 'r') for line in lf: @@ -3640,7 +3717,7 @@ def createHTML(testruns): lf.close() hf.write('</div>\n') # add the ftrace log as a hidden div - if sysvals.addlogs and sysvals.ftracefile: + if sysvals.ftracelog and sysvals.ftracefile: hf.write('<div id="ftracelog" style="display:none;">\n') lf = open(sysvals.ftracefile, 'r') for line in lf: @@ -3701,6 +3778,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''): <style type=\'text/css\'>\n\ body {overflow-y:scroll;}\n\ .stamp {width:100%;text-align:center;background:gray;line-height:30px;color:white;font:25px Arial;}\n\ + .stamp.sysinfo {font:10px Arial;}\n\ .callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\ .callgraph article * {padding-left:28px;}\n\ h1 {color:black;font:bold 30px Times;}\n\ @@ -3746,7 +3824,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''): .legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\ .legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\ button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\ - .logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\ + .btnfmt {position:relative;float:right;height:25px;width:auto;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\ .devlist {position:'+devlistpos+';width:190px;}\n\ a:link {color:white;text-decoration:none;}\n\ a:visited {color:white;}\n\ @@ -4084,8 +4162,6 @@ def addScriptCode(hf, testruns): ' win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\ ' win.document.close();\n'\ ' }\n'\ - ' function onClickPhase(e) {\n'\ - ' }\n'\ ' function onMouseDown(e) {\n'\ ' dragval[0] = e.clientX;\n'\ ' dragval[1] = document.getElementById("dmesgzoombox").scrollLeft;\n'\ @@ -4120,9 +4196,6 @@ def addScriptCode(hf, testruns): ' document.getElementById("zoomin").onclick = zoomTimeline;\n'\ ' document.getElementById("zoomout").onclick = zoomTimeline;\n'\ ' document.getElementById("zoomdef").onclick = zoomTimeline;\n'\ - ' var list = document.getElementsByClassName("square");\n'\ - ' for (var i = 0; i < list.length; i++)\n'\ - ' list[i].onclick = onClickPhase;\n'\ ' var list = document.getElementsByClassName("err");\n'\ ' for (var i = 0; i < list.length; i++)\n'\ ' list[i].onclick = errWindow;\n'\ @@ -4193,8 +4266,14 @@ def executeSuspend(): if sysvals.testcommand != '': call(sysvals.testcommand+' 2>&1', shell=True); else: + mode = sysvals.suspendmode + if sysvals.memmode and os.path.exists(sysvals.mempowerfile): + mode = 'mem' + pf = open(sysvals.mempowerfile, 'w') + pf.write(sysvals.memmode) + pf.close() pf = open(sysvals.powerfile, 'w') - pf.write(sysvals.suspendmode) + pf.write(mode) # execution will pause here try: pf.close() @@ -4219,24 +4298,15 @@ def executeSuspend(): pm.stop() sysvals.fsetVal('0', 'tracing_on') print('CAPTURING TRACE') - writeDatafileHeader(sysvals.ftracefile, fwdata) + sysvals.writeDatafileHeader(sysvals.ftracefile, fwdata) call('cat '+tp+'trace >> '+sysvals.ftracefile, shell=True) sysvals.fsetVal('', 'trace') devProps() # grab a copy of the dmesg output print('CAPTURING DMESG') - writeDatafileHeader(sysvals.dmesgfile, fwdata) + sysvals.writeDatafileHeader(sysvals.dmesgfile, fwdata) sysvals.getdmesg() -def writeDatafileHeader(filename, fwdata): - fp = open(filename, 'a') - fp.write(sysvals.teststamp+'\n') - if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'): - for fw in fwdata: - if(fw): - fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1])) - fp.close() - # Function: setUSBDevicesAuto # Description: # Set the autosuspend control parameter of all USB devices to auto @@ -4244,7 +4314,7 @@ def writeDatafileHeader(filename, fwdata): # to always-on since the kernel cant determine if the device can # properly autosuspend def setUSBDevicesAuto(): - rootCheck(True) + sysvals.rootCheck(True) for dirname, dirnames, filenames in os.walk('/sys/devices'): if(re.match('.*/usb[0-9]*.*', dirname) and 'idVendor' in filenames and 'idProduct' in filenames): @@ -4467,13 +4537,146 @@ def devProps(data=0): # Output: # A string list of the available modes def getModes(): - modes = '' + modes = [] if(os.path.exists(sysvals.powerfile)): fp = open(sysvals.powerfile, 'r') modes = string.split(fp.read()) fp.close() + if(os.path.exists(sysvals.mempowerfile)): + deep = False + fp = open(sysvals.mempowerfile, 'r') + for m in string.split(fp.read()): + memmode = m.strip('[]') + if memmode == 'deep': + deep = True + else: + modes.append('mem-%s' % memmode) + fp.close() + if 'mem' in modes and not deep: + modes.remove('mem') return modes +# Function: dmidecode +# Description: +# Read the bios tables and pull out system info +# Arguments: +# mempath: /dev/mem or custom mem path +# fatal: True to exit on error, False to return empty dict +# Output: +# A dict object with all available key/values +def dmidecode(mempath, fatal=False): + out = dict() + + # the list of values to retrieve, with hardcoded (type, idx) + info = { + 'bios-vendor': (0, 4), + 'bios-version': (0, 5), + 'bios-release-date': (0, 8), + 'system-manufacturer': (1, 4), + 'system-product-name': (1, 5), + 'system-version': (1, 6), + 'system-serial-number': (1, 7), + 'baseboard-manufacturer': (2, 4), + 'baseboard-product-name': (2, 5), + 'baseboard-version': (2, 6), + 'baseboard-serial-number': (2, 7), + 'chassis-manufacturer': (3, 4), + 'chassis-type': (3, 5), + 'chassis-version': (3, 6), + 'chassis-serial-number': (3, 7), + 'processor-manufacturer': (4, 7), + 'processor-version': (4, 16), + } + if(not os.path.exists(mempath)): + if(fatal): + doError('file does not exist: %s' % mempath) + return out + if(not os.access(mempath, os.R_OK)): + if(fatal): + doError('file is not readable: %s' % mempath) + return out + + # by default use legacy scan, but try to use EFI first + memaddr = 0xf0000 + memsize = 0x10000 + for ep in ['/sys/firmware/efi/systab', '/proc/efi/systab']: + if not os.path.exists(ep) or not os.access(ep, os.R_OK): + continue + fp = open(ep, 'r') + buf = fp.read() + fp.close() + i = buf.find('SMBIOS=') + if i >= 0: + try: + memaddr = int(buf[i+7:], 16) + memsize = 0x20 + except: + continue + + # read in the memory for scanning + fp = open(mempath, 'rb') + try: + fp.seek(memaddr) + buf = fp.read(memsize) + except: + if(fatal): + doError('DMI table is unreachable, sorry') + else: + return out + fp.close() + + # search for either an SM table or DMI table + i = base = length = num = 0 + while(i < memsize): + if buf[i:i+4] == '_SM_' and i < memsize - 16: + length = struct.unpack('H', buf[i+22:i+24])[0] + base, num = struct.unpack('IH', buf[i+24:i+30]) + break + elif buf[i:i+5] == '_DMI_': + length = struct.unpack('H', buf[i+6:i+8])[0] + base, num = struct.unpack('IH', buf[i+8:i+14]) + break + i += 16 + if base == 0 and length == 0 and num == 0: + if(fatal): + doError('Neither SMBIOS nor DMI were found') + else: + return out + + # read in the SM or DMI table + fp = open(mempath, 'rb') + try: + fp.seek(base) + buf = fp.read(length) + except: + if(fatal): + doError('DMI table is unreachable, sorry') + else: + return out + fp.close() + + # scan the table for the values we want + count = i = 0 + while(count < num and i <= len(buf) - 4): + type, size, handle = struct.unpack('BBH', buf[i:i+4]) + n = i + size + while n < len(buf) - 1: + if 0 == struct.unpack('H', buf[n:n+2])[0]: + break + n += 1 + data = buf[i+size:n+2].split('\0') + for name in info: + itype, idxadr = info[name] + if itype == type: + idx = struct.unpack('B', buf[i+idxadr])[0] + if idx > 0 and idx < len(data) - 1: + s = data[idx-1].strip() + if s and s.lower() != 'to be filled by o.e.m.': + out[name] = data[idx-1] + i = n + 2 + count += 1 + return out + # Function: getFPDT # Description: # Read the acpi bios tables and pull out FPDT, the firmware data @@ -4487,7 +4690,7 @@ def getFPDT(output): prectype[0] = 'Basic S3 Resume Performance Record' prectype[1] = 'Basic S3 Suspend Performance Record' - rootCheck(True) + sysvals.rootCheck(True) if(not os.path.exists(sysvals.fpdtpath)): if(output): doError('file does not exist: %s' % sysvals.fpdtpath) @@ -4617,7 +4820,7 @@ def statusCheck(probecheck=False): # check we have root access res = sysvals.colorText('NO (No features of this tool will work!)') - if(rootCheck(False)): + if(sysvals.rootCheck(False)): res = 'YES' print(' have root access: %s' % res) if(res != 'YES'): @@ -4716,16 +4919,6 @@ def doError(msg, help=False): print('ERROR: %s\n') % msg sys.exit() -# Function: rootCheck -# Description: -# quick check to see if we have root access -def rootCheck(fatal): - if(os.access(sysvals.powerfile, os.W_OK)): - return True - if fatal: - doError('This command requires sysfs mount and root access') - return False - # Function: getArgInt # Description: # pull out an integer argument from the command line with checks @@ -4779,6 +4972,7 @@ def processData(): if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)): appendIncompleteTraceLog(testruns) createHTML(testruns) + return testruns # Function: rerunTest # Description: @@ -4790,17 +4984,20 @@ def rerunTest(): doError('recreating this html output requires a dmesg file') sysvals.setOutputFile() vprint('Output file: %s' % sysvals.htmlfile) - if(os.path.exists(sysvals.htmlfile) and not os.access(sysvals.htmlfile, os.W_OK)): - doError('missing permission to write to %s' % sysvals.htmlfile) - processData() + if os.path.exists(sysvals.htmlfile): + if not os.path.isfile(sysvals.htmlfile): + doError('a directory already exists with this name: %s' % sysvals.htmlfile) + elif not os.access(sysvals.htmlfile, os.W_OK): + doError('missing permission to write to %s' % sysvals.htmlfile) + return processData() # Function: runTest # Description: # execute a suspend/resume, gather the logs, and generate the output -def runTest(subdir, testpath=''): +def runTest(): # prepare for the test sysvals.initFtrace() - sysvals.initTestOutput(subdir, testpath) + sysvals.initTestOutput('suspend') vprint('Output files:\n\t%s\n\t%s\n\t%s' % \ (sysvals.dmesgfile, sysvals.ftracefile, sysvals.htmlfile)) @@ -4897,7 +5094,7 @@ def configFromFile(file): if(opt.lower() == 'verbose'): sysvals.verbose = checkArgBool(value) elif(opt.lower() == 'addlogs'): - sysvals.addlogs = checkArgBool(value) + sysvals.dmesglog = sysvals.ftracelog = checkArgBool(value) elif(opt.lower() == 'dev'): sysvals.usedevsrc = checkArgBool(value) elif(opt.lower() == 'proc'): @@ -4947,7 +5144,7 @@ def configFromFile(file): elif(opt.lower() == 'mincg'): sysvals.mincglen = getArgFloat('-mincg', value, 0.0, 10000.0, False) elif(opt.lower() == 'output-dir'): - sysvals.setOutputFolder(value) + sysvals.testdir = sysvals.setOutputFolder(value) if sysvals.suspendmode == 'command' and not sysvals.testcommand: doError('No command supplied for mode "command"') @@ -5030,8 +5227,6 @@ def configFromFile(file): # Description: # print out the help text def printHelp(): - modes = getModes() - print('') print('%s v%s' % (sysvals.title, sysvals.version)) print('Usage: sudo sleepgraph <options> <commands>') @@ -5048,7 +5243,7 @@ def printHelp(): print(' If no specific command is given, the default behavior is to initiate') print(' a suspend/resume and capture the dmesg/ftrace output as an html timeline.') print('') - print(' Generates output files in subdirectory: suspend-mmddyy-HHMMSS') + print(' Generates output files in subdirectory: suspend-yymmdd-HHMMSS') print(' HTML output: <hostname>_<mode>.html') print(' raw dmesg output: <hostname>_<mode>_dmesg.txt') print(' raw ftrace output: <hostname>_<mode>_ftrace.txt') @@ -5058,8 +5253,9 @@ def printHelp(): print(' -v Print the current tool version') print(' -config fn Pull arguments and config options from file fn') print(' -verbose Print extra information during execution and analysis') - print(' -m mode Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode) - print(' -o subdir Override the output subdirectory') + print(' -m mode Mode to initiate for suspend (default: %s)') % (sysvals.suspendmode) + print(' -o name Overrides the output subdirectory name when running a new test') + print(' default: suspend-{date}-{time}') print(' -rtcwake t Wakeup t seconds after suspend, set t to "off" to disable (default: 15)') print(' -addlogs Add the dmesg and ftrace logs to the html output') print(' -srgap Add a visible gap in the timeline between sus/res (default: disabled)') @@ -5084,17 +5280,20 @@ def printHelp(): print(' -cgphase P Only show callgraph data for phase P (e.g. suspend_late)') print(' -cgtest N Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)') print(' -timeprec N Number of significant digits in timestamps (0:S, [3:ms], 6:us)') - print(' [commands]') - print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)') - print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)') - print(' -summary directory Create a summary of all test in this dir') + print('') + print('Other commands:') print(' -modes List available suspend modes') print(' -status Test to see if the system is enabled to run this tool') print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table') + print(' -sysinfo Print out system info extracted from BIOS') print(' -usbtopo Print out the current USB topology with power info') print(' -usbauto Enable autosuspend for all connected USB devices') print(' -flist Print the list of functions currently being captured in ftrace') print(' -flistall Print all functions capable of being captured in ftrace') + print(' -summary directory Create a summary of all test in this dir') + print(' [redo]') + print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)') + print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)') print('') return True @@ -5102,9 +5301,9 @@ def printHelp(): # exec start (skipped if script is loaded as library) if __name__ == '__main__': cmd = '' - cmdarg = '' + outdir = '' multitest = {'run': False, 'count': 0, 'delay': 0} - simplecmds = ['-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status'] + simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status'] # loop through the command line arguments args = iter(sys.argv[1:]) for arg in args: @@ -5135,7 +5334,7 @@ if __name__ == '__main__': elif(arg == '-f'): sysvals.usecallgraph = True elif(arg == '-addlogs'): - sysvals.addlogs = True + sysvals.dmesglog = sysvals.ftracelog = True elif(arg == '-verbose'): sysvals.verbose = True elif(arg == '-proc'): @@ -5195,7 +5394,7 @@ if __name__ == '__main__': val = args.next() except: doError('No subdirectory name supplied', True) - sysvals.setOutputFolder(val) + outdir = sysvals.setOutputFolder(val) elif(arg == '-config'): try: val = args.next() @@ -5236,7 +5435,7 @@ if __name__ == '__main__': except: doError('No directory supplied', True) cmd = 'summary' - cmdarg = val + outdir = val sysvals.notestrun = True if(os.path.isdir(val) == False): doError('%s is not accesible' % val) @@ -5260,11 +5459,14 @@ if __name__ == '__main__': sysvals.mincglen = sysvals.mindevlen # just run a utility command and exit + sysvals.cpuInfo() if(cmd != ''): if(cmd == 'status'): statusCheck(True) elif(cmd == 'fpdt'): getFPDT(True) + elif(cmd == 'sysinfo'): + sysvals.printSystemInfo() elif(cmd == 'usbtopo'): detectUSB() elif(cmd == 'modes'): @@ -5276,7 +5478,7 @@ if __name__ == '__main__': elif(cmd == 'usbauto'): setUSBDevicesAuto() elif(cmd == 'summary'): - runSummary(cmdarg, True) + runSummary(outdir, True) sys.exit() # if instructed, re-analyze existing data files @@ -5289,21 +5491,43 @@ if __name__ == '__main__': print('Check FAILED, aborting the test run!') sys.exit() + # extract mem modes and convert + mode = sysvals.suspendmode + if 'mem' == mode[:3]: + if '-' in mode: + memmode = mode.split('-')[-1] + else: + memmode = 'deep' + if memmode == 'shallow': + mode = 'standby' + elif memmode == 's2idle': + mode = 'freeze' + else: + mode = 'mem' + sysvals.memmode = memmode + sysvals.suspendmode = mode + + sysvals.systemInfo(dmidecode(sysvals.mempath)) + if multitest['run']: # run multiple tests in a separate subdirectory - s = 'x%d' % multitest['count'] - if not sysvals.outdir: - sysvals.outdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S') - if not os.path.isdir(sysvals.outdir): - os.mkdir(sysvals.outdir) + if not outdir: + s = 'suspend-x%d' % multitest['count'] + outdir = datetime.now().strftime(s+'-%y%m%d-%H%M%S') + if not os.path.isdir(outdir): + os.mkdir(outdir) for i in range(multitest['count']): if(i != 0): print('Waiting %d seconds...' % (multitest['delay'])) time.sleep(multitest['delay']) print('TEST (%d/%d) START' % (i+1, multitest['count'])) - runTest(sysvals.outdir) + fmt = 'suspend-%y%m%d-%H%M%S' + sysvals.testdir = os.path.join(outdir, datetime.now().strftime(fmt)) + runTest() print('TEST (%d/%d) COMPLETE' % (i+1, multitest['count'])) - runSummary(sysvals.outdir, False) + runSummary(outdir, False) else: + if outdir: + sysvals.testdir = outdir # run the test in the current directory - runTest('.', sysvals.outdir) + runTest() diff --git a/tools/power/pm-graph/bootgraph.8 b/tools/power/pm-graph/bootgraph.8 index 55272a67b0e7..dbdafcf546df 100644 --- a/tools/power/pm-graph/bootgraph.8 +++ b/tools/power/pm-graph/bootgraph.8 @@ -8,14 +8,23 @@ bootgraph \- Kernel boot timing analysis .RB [ COMMAND ] .SH DESCRIPTION \fBbootgraph \fP reads the dmesg log from kernel boot and -creates an html representation of the initcall timeline up to the start -of the init process. +creates an html representation of the initcall timeline. It graphs +every module init call found, through both kernel and user modes. The +timeline is split into two phases: kernel mode & user mode. kernel mode +represents a single process run on a single cpu with serial init calls. +Once user mode begins, the init process is called, and the init calls +start working in parallel. .PP If no specific command is given, the tool reads the current dmesg log and -outputs bootgraph.html. +outputs a new timeline. .PP The tool can also augment the timeline with ftrace data on custom target functions as well as full trace callgraphs. +.PP +Generates output files in subdirectory: boot-yymmdd-HHMMSS + html timeline : <hostname>_boot.html + raw dmesg file : <hostname>_boot_dmesg.txt + raw ftrace file : <hostname>_boot_ftrace.txt .SH OPTIONS .TP \fB-h\fR @@ -28,15 +37,18 @@ Print the current tool version Add the dmesg log to the html output. It will be viewable by clicking a button in the timeline. .TP -\fB-o \fIfile\fR -Override the HTML output filename (default: bootgraph.html) -.SS "Ftrace Debug" +\fB-o \fIname\fR +Overrides the output subdirectory name when running a new test. +Use {date}, {time}, {hostname} for current values. +.sp +e.g. boot-{hostname}-{date}-{time} +.SS "advanced" .TP \fB-f\fR Use ftrace to add function detail (default: disabled) .TP \fB-callgraph\fR -Use ftrace to create initcall callgraphs (default: disabled). If -filter +Use ftrace to create initcall callgraphs (default: disabled). If -func is not used there will be one callgraph per initcall. This can produce very large outputs, i.e. 10MB - 100MB. .TP @@ -50,16 +62,19 @@ This reduces the html file size as there can be many tiny callgraphs which are barely visible in the timeline. The value is a float: e.g. 0.001 represents 1 us. .TP +\fB-cgfilter \fI"func1,func2,..."\fR +Reduce callgraph output in the timeline by limiting it to a list of calls. The +argument can be a single function name or a comma delimited list. +(default: none) +.TP \fB-timeprec \fIn\fR Number of significant digits in timestamps (0:S, 3:ms, [6:us]) .TP \fB-expandcg\fR pre-expand the callgraph data in the html output (default: disabled) .TP -\fB-filter \fI"func1,func2,..."\fR +\fB-func \fI"func1,func2,..."\fR Instead of tracing each initcall, trace a custom list of functions (default: do_one_initcall) - -.SH COMMANDS .TP \fB-reboot\fR Reboot the machine and generate a new timeline automatically. Works in 4 steps. @@ -73,16 +88,23 @@ Show the requirements to generate a new timeline manually. Requires 3 steps. 1. append the string to the kernel command line via your native boot manager. 2. reboot the system 3. after startup, re-run the tool with the same arguments and no command + +.SH COMMANDS +.SS "rebuild" .TP \fB-dmesg \fIfile\fR Create HTML output from an existing dmesg file. .TP \fB-ftrace \fIfile\fR Create HTML output from an existing ftrace file (used with -dmesg). +.SS "other" .TP \fB-flistall\fR Print all ftrace functions capable of being captured. These are all the -possible values you can add to trace via the -filter argument. +possible values you can add to trace via the -func argument. +.TP +\fB-sysinfo\fR +Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode. .SH EXAMPLES Create a timeline using the current dmesg log. @@ -93,13 +115,13 @@ Create a timeline using the current dmesg and ftrace log. .IP \f(CW$ bootgraph -callgraph\fR .PP -Create a timeline using the current dmesg, add the log to the html and change the name. +Create a timeline using the current dmesg, add the log to the html and change the folder. .IP -\f(CW$ bootgraph -addlogs -o myboot.html\fR +\f(CW$ bootgraph -addlogs -o "myboot-{date}-{time}"\fR .PP Capture a new boot timeline by automatically rebooting the machine. .IP -\f(CW$ sudo bootgraph -reboot -addlogs -o latestboot.html\fR +\f(CW$ sudo bootgraph -reboot -addlogs -o "latest-{hostname)"\fR .PP Capture a new boot timeline with function trace data. .IP @@ -111,7 +133,7 @@ Capture a new boot timeline with trace & callgraph data. Skip callgraphs smaller .PP Capture a new boot timeline with callgraph data over custom functions. .IP -\f(CW$ sudo bootgraph -reboot -callgraph -filter "acpi_ps_parse_aml,msleep"\fR +\f(CW$ sudo bootgraph -reboot -callgraph -func "acpi_ps_parse_aml,msleep"\fR .PP Capture a brand new boot timeline with manual reboot. .IP @@ -123,6 +145,15 @@ Capture a brand new boot timeline with manual reboot. .IP \f(CW$ sudo bootgraph -callgraph # re-run the tool after restart\fR .PP +.SS "rebuild timeline from logs" +.PP +Rebuild the html from a previous run's logs, using the same options. +.IP +\f(CW$ bootgraph -dmesg dmesg.txt -ftrace ftrace.txt -callgraph\fR +.PP +Rebuild the html with different options. +.IP +\f(CW$ bootgraph -dmesg dmesg.txt -ftrace ftrace.txt -addlogs\fR .SH "SEE ALSO" dmesg(1), update-grub(8), crontab(1), reboot(8) diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 610e72ebbc06..fbe7bd3eae8e 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -39,8 +39,9 @@ Pull arguments and config options from a file. \fB-m \fImode\fR Mode to initiate for suspend e.g. standby, freeze, mem (default: mem). .TP -\fB-o \fIsubdir\fR -Override the output subdirectory. Use {date}, {time}, {hostname} for current values. +\fB-o \fIname\fR +Overrides the output subdirectory name when running a new test. +Use {date}, {time}, {hostname} for current values. .sp e.g. suspend-{hostname}-{date}-{time} .TP @@ -52,7 +53,7 @@ disable rtcwake and require a user keypress to resume. Add the dmesg and ftrace logs to the html output. They will be viewable by clicking buttons in the timeline. -.SS "Advanced" +.SS "advanced" .TP \fB-cmd \fIstr\fR Run the timeline over a custom suspend command, e.g. pm-suspend. By default @@ -91,7 +92,7 @@ Include \fIt\fR ms delay after last resume (default: 0 ms). Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}. -.SS "Ftrace Debug" +.SS "ftrace debug" .TP \fB-f\fR Use ftrace to create device callgraphs (default: disabled). This can produce @@ -124,12 +125,6 @@ Number of significant digits in timestamps (0:S, [3:ms], 6:us). .SH COMMANDS .TP -\fB-ftrace \fIfile\fR -Create HTML output from an existing ftrace file. -.TP -\fB-dmesg \fIfile\fR -Create HTML output from an existing dmesg file. -.TP \fB-summary \fIindir\fR Create a summary page of all tests in \fIindir\fR. Creates summary.html in the current folder. The output page is a table of tests with @@ -146,6 +141,9 @@ with any options you intend to use to see if they will work. \fB-fpdt\fR Print out the contents of the ACPI Firmware Performance Data Table. .TP +\fB-sysinfo\fR +Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode. +.TP \fB-usbtopo\fR Print out the current USB topology with power info. .TP @@ -162,9 +160,16 @@ with -fadd they will also be checked. \fB-flistall\fR Print all ftrace functions capable of being captured. These are all the possible values you can add to trace via the -fadd argument. +.SS "rebuild" +.TP +\fB-ftrace \fIfile\fR +Create HTML output from an existing ftrace file. +.TP +\fB-dmesg \fIfile\fR +Create HTML output from an existing dmesg file. .SH EXAMPLES -.SS "Simple Commands" +.SS "simple commands" Check which suspend modes are currently supported. .IP \f(CW$ sleepgraph -modes\fR @@ -185,12 +190,8 @@ Generate a summary of all timelines in a particular folder. .IP \f(CW$ sleepgraph -summary ~/workspace/myresults/\fR .PP -Re-generate the html output from a previous run's dmesg and ftrace log. -.IP -\f(CW$ sleepgraph -dmesg myhost_mem_dmesg.txt -ftrace myhost_mem_ftrace.txt\fR -.PP -.SS "Capturing Simple Timelines" +.SS "capturing basic timelines" Execute a mem suspend with a 15 second wakeup. Include the logs in the html. .IP \f(CW$ sudo sleepgraph -rtcwake 15 -addlogs\fR @@ -204,7 +205,7 @@ Execute a freeze with no wakeup (require keypress). Change output folder name. \f(CW$ sudo sleepgraph -m freeze -rtcwake off -o "freeze-{hostname}-{date}-{time}"\fR .PP -.SS "Capturing Advanced Timelines" +.SS "capturing advanced timelines" Execute a suspend & include dev mode source calls, limit callbacks to 5ms or larger. .IP \f(CW$ sudo sleepgraph -m mem -rtcwake 15 -dev -mindev 5\fR @@ -222,8 +223,7 @@ Execute a suspend using a custom command. \f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR .PP - -.SS "Capturing Timelines with Callgraph Data" +.SS "adding callgraph data" Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger. .IP \f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f -maxdepth 5 -mincg 10\fR @@ -235,6 +235,16 @@ Capture a full callgraph across all suspend, then filter the html by a single ph \f(CW$ sleepgraph -dmesg host_mem_dmesg.txt -ftrace host_mem_ftrace.txt -f -cgphase resume .PP +.SS "rebuild timeline from logs" +.PP +Rebuild the html from a previous run's logs, using the same options. +.IP +\f(CW$ sleepgraph -dmesg dmesg.txt -ftrace ftrace.txt -callgraph\fR +.PP +Rebuild the html with different options. +.IP +\f(CW$ sleepgraph -dmesg dmesg.txt -ftrace ftrace.txt -addlogs -srgap\fR + .SH "SEE ALSO" dmesg(1) .PP diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 1e8b6116ba3c..9dc8f078a83c 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -1,7 +1,7 @@ ifneq ($(O),) ifeq ($(origin O), command line) - dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),) - ABSOLUTE_O := $(shell cd $(O) ; pwd) + ABSOLUTE_O := $(realpath $(O)) + dummy := $(if $(ABSOLUTE_O),,$(error O=$(O) does not exist)) OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/) COMMAND_O := O=$(ABSOLUTE_O) ifeq ($(objtree),) @@ -12,7 +12,7 @@ endif # check that the output directory actually exists ifneq ($(OUTPUT),) -OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +OUTDIR := $(realpath $(OUTPUT)) $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) endif diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4c2fa98ef39d..bef419d4266d 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -1527,9 +1527,6 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); - set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); - set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -1546,8 +1543,8 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, else { memcpy(iobuf, mmio->addr.base + dpa, len); - /* give us some some coverage of the mmio_flush_range() API */ - mmio_flush_range(mmio->addr.base + dpa, len); + /* give us some some coverage of the arch_invalidate_pmem() API */ + arch_invalidate_pmem(mmio->addr.base + dpa, len); } nd_region_release_lane(nd_region, lane); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 26ce4f7168be..ff805643b5f7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -52,6 +52,10 @@ override LDFLAGS = override MAKEFLAGS = endif +ifneq ($(KBUILD_SRC),) +override LDFLAGS = +endif + BUILD := $(O) ifndef BUILD BUILD := $(KBUILD_OUTPUT) @@ -62,32 +66,32 @@ endif export BUILD all: - for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_tests: all - for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ done; hotplug: - for TARGET in $(TARGETS_HOTPLUG); do \ + @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ done; run_hotplug: hotplug - for TARGET in $(TARGETS_HOTPLUG); do \ + @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ done; clean_hotplug: - for TARGET in $(TARGETS_HOTPLUG); do \ + @for TARGET in $(TARGETS_HOTPLUG); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; @@ -103,7 +107,7 @@ install: ifdef INSTALL_PATH @# Ask all targets to install their files mkdir -p $(INSTALL_PATH) - for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ done; @@ -128,7 +132,7 @@ else endif clean: - for TARGET in $(TARGETS); do \ + @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 153c3a181a4c..f4b23d697448 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -15,9 +15,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_align TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ - test_pkt_md_access.o + test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o -TEST_PROGS := test_kmod.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh include ../lib.mk diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index d50ac342dc92..36fb9161b34a 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; static int (*bpf_redirect)(int ifindex, int flags) = (void *) BPF_FUNC_redirect; +static int (*bpf_redirect_map)(void *map, int key, int flags) = + (void *) BPF_FUNC_redirect_map; static int (*bpf_perf_event_output)(void *ctx, void *map, unsigned long long flags, void *data, int size) = @@ -63,6 +65,12 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, int optlen) = (void *) BPF_FUNC_setsockopt; +static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = + (void *) BPF_FUNC_sk_redirect_map; +static int (*bpf_sock_map_update)(void *map, void *key, void *value, + unsigned long long flags) = + (void *) BPF_FUNC_sock_map_update; + /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions @@ -85,6 +93,7 @@ struct bpf_map_def { unsigned int max_entries; unsigned int map_flags; unsigned int inner_map_idx; + unsigned int numa_node; }; static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) = diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 20ecbaa0d85d..6c53a8906eff 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void) unsigned int start, end, possible_cpus = 0; char buff[128]; FILE *fp; + int n; fp = fopen(fcpu, "r"); if (!fp) { @@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void) } while (fgets(buff, sizeof(buff), fp)) { - if (sscanf(buff, "%u-%u", &start, &end) == 2) { - possible_cpus = start == 0 ? end + 1 : 0; - break; + n = sscanf(buff, "%u-%u", &start, &end); + if (n == 0) { + printf("Failed to retrieve # possible CPUs!\n"); + exit(1); + } else if (n == 1) { + end = start; } + possible_cpus = start == 0 ? end + 1 : 0; + break; } - fclose(fp); - if (!possible_cpus) { - printf("Failed to retrieve # possible CPUs!\n"); - exit(1); - } return possible_cpus; } diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c new file mode 100644 index 000000000000..fae3b96c3aa4 --- /dev/null +++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c @@ -0,0 +1,38 @@ +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +SEC("sk_skb1") +int bpf_prog1(struct __sk_buff *skb) +{ + void *data_end = (void *)(long) skb->data_end; + void *data = (void *)(long) skb->data; + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + __u8 *d = data; + + if (data + 10 > data_end) + return skb->len; + + /* This write/read is a bit pointless but tests the verifier and + * strparser handler for read/write pkt data and access into sk + * fields. + */ + d[7] = 1; + + bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n", + d[0], lport, bpf_ntohl(rport)); + return skb->len; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c new file mode 100644 index 000000000000..9b99bd10807d --- /dev/null +++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c @@ -0,0 +1,68 @@ +#include <linux/bpf.h> +#include "bpf_helpers.h" +#include "bpf_util.h" +#include "bpf_endian.h" + +int _version SEC("version") = 1; + +#define bpf_printk(fmt, ...) \ +({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ +}) + +struct bpf_map_def SEC("maps") sock_map_rx = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_tx = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +struct bpf_map_def SEC("maps") sock_map_break = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 20, +}; + +SEC("sk_skb2") +int bpf_prog2(struct __sk_buff *skb) +{ + void *data_end = (void *)(long) skb->data_end; + void *data = (void *)(long) skb->data; + __u32 lport = skb->local_port; + __u32 rport = skb->remote_port; + __u8 *d = data; + __u8 sk, map; + + if (data + 8 > data_end) + return SK_DROP; + + map = d[0]; + sk = d[1]; + + d[0] = 0xd; + d[1] = 0xe; + d[2] = 0xa; + d[3] = 0xd; + d[4] = 0xb; + d[5] = 0xe; + d[6] = 0xe; + d[7] = 0xf; + + bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); + + if (!map) + return bpf_sk_redirect_map(&sock_map_rx, sk, 0); + return bpf_sk_redirect_map(&sock_map_tx, sk, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 29793694cbc7..8591c89c0828 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -27,6 +27,11 @@ #define MAX_INSNS 512 #define MAX_MATCHES 16 +struct bpf_reg_match { + unsigned int line; + const char *match; +}; + struct bpf_align_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; @@ -36,10 +41,14 @@ struct bpf_align_test { REJECT } result; enum bpf_prog_type prog_type; - const char *matches[MAX_MATCHES]; + /* Matches must be in order of increasing line */ + struct bpf_reg_match matches[MAX_MATCHES]; }; static struct bpf_align_test tests[] = { + /* Four tests of known constants. These aren't staggeringly + * interesting since we track exact values now. + */ { .descr = "mov", .insns = { @@ -53,11 +62,13 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", - "2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", - "3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", - "4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", - "5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp", + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3=inv2"}, + {2, "R3=inv4"}, + {3, "R3=inv8"}, + {4, "R3=inv16"}, + {5, "R3=inv32"}, }, }, { @@ -79,17 +90,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", - "2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp", - "3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", - "4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp", - "5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp", - "6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp", - "7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp", - "8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp", - "9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", - "10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp", - "11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp", + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3=inv1"}, + {2, "R3=inv2"}, + {3, "R3=inv4"}, + {4, "R3=inv8"}, + {5, "R3=inv16"}, + {6, "R3=inv1"}, + {7, "R4=inv32"}, + {8, "R4=inv16"}, + {9, "R4=inv8"}, + {10, "R4=inv4"}, + {11, "R4=inv2"}, }, }, { @@ -106,12 +119,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp", - "2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp", - "3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp", - "4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp", - "5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp", - "6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp", + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3=inv4"}, + {2, "R3=inv8"}, + {3, "R3=inv10"}, + {4, "R4=inv8"}, + {5, "R4=inv12"}, + {6, "R4=inv14"}, }, }, { @@ -126,13 +141,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", - "2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp", - "3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp", - "4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp", + {1, "R1=ctx(id=0,off=0,imm=0)"}, + {1, "R10=fp0"}, + {1, "R3=inv7"}, + {2, "R3=inv7"}, + {3, "R3=inv14"}, + {4, "R3=inv56"}, }, }, + /* Tests using unknown values */ #define PREP_PKT_POINTERS \ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \ offsetof(struct __sk_buff, data)), \ @@ -166,17 +184,19 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", - "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp", - "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp", - "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp", - "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp", - "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp", - "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp", - "20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp", - "21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp", - "22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp", - "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp", + {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, + {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {18, "R3=pkt_end(id=0,off=0,imm=0)"}, + {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -197,16 +217,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp", - "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", - "9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp", - "10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", - "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp", - "12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", - "13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp", - "14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp", - "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp", - "16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp" + {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -237,12 +257,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - "4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp", - "5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp", - "6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp", - "10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp", - "14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", - "15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp", + {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, + {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, + {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -297,62 +319,286 @@ static struct bpf_align_test tests[] = { /* Calculated offset in R6 has unknown value, but known * alignment of 4. */ - "8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp", - - /* Offset is added to packet pointer R5, resulting in known - * auxiliary alignment and offset. + {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Offset is added to packet pointer R5, resulting in + * known fixed offset, and variable offset from R6. */ - "11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", - + {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable * offset is considered using reg->aux_off_align which * is 4 and meets the load's requirements. */ - "15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", - - + {15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - "18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", - + {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - "19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp", - + {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, - * it's total offset is NET_IP_ALIGN + reg->off (14) which - * is 16. Then the variable offset is considered using - * reg->aux_off_align which is 4 and meets the load's - * requirements. + * its total fixed offset is NET_IP_ALIGN + reg->off + * (14) which is 16. Then the variable offset is 4-byte + * aligned, so the total offset is 4-byte aligned and + * meets the load's requirements. */ - "23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp", - + {23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - "26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp", - /* Variable offset is added to R5, resulting in an - * auxiliary offset of 14, and an auxiliary alignment of 4. + {26, "R5=pkt(id=0,off=14,r=8"}, + /* Variable offset is added to R5, resulting in a + * variable offset of (4n). */ - "27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", - /* Constant is added to R5 again, setting reg->off to 4. */ - "28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp", - /* And once more we add a variable, which causes an accumulation - * of reg->off into reg->aux_off_align, with resulting value of - * 18. The auxiliary alignment stays at 4. + {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Constant is added to R5 again, setting reg->off to 18. */ + {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* And once more we add a variable; resulting var_off + * is still (4n), fixed offset is not changed. + * Also, we create a new reg->id. */ - "29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, - * it's total offset is NET_IP_ALIGN + reg->off (0) + - * reg->aux_off (18) which is 20. Then the variable offset - * is considered using reg->aux_off_align which is 4 and meets - * the load's requirements. + * its total fixed offset is NET_IP_ALIGN + reg->off (18) + * which is 20. Then the variable offset is (4n), so + * the total offset is 4-byte aligned and meets the + * load's requirements. + */ + {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"}, + }, + }, + { + .descr = "packet variable offset 2", + .insns = { + /* Create an unknown offset, (4n+2)-aligned */ + LOAD_UNKNOWN(BPF_REG_6), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14), + /* Add it to the packet pointer */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + /* Check bounds and perform a read */ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), + /* Make a (4n) offset from the value we just read */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + /* Add it to the packet pointer */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + /* Check bounds and perform a read */ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ + {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, + {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Adding 14 makes R6 be (4n+2) */ + {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* Packet pointer has (4n+2) offset */ + {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (0) + * which is 2. Then the variable offset is (4n+2), so + * the total offset is 4-byte aligned and meets the + * load's requirements. + */ + {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* Newly read value in R6 was shifted left by 2, so has + * known alignment of 4. + */ + {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Added (4n) to packet pointer's (4n+2) var_off, giving + * another (4n+2). + */ + {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (0) + * which is 2. Then the variable offset is (4n+2), so + * the total offset is 4-byte aligned and meets the + * load's requirements. + */ + {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + }, + }, + { + .descr = "dubious pointer arithmetic", + .insns = { + PREP_PKT_POINTERS, + BPF_MOV64_IMM(BPF_REG_0, 0), + /* ptr & const => unknown & const */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40), + /* ptr << const => unknown << const */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2), + /* We have a (4n) value. Let's make a packet offset + * out of it. First add 14, to make it a (4n+2) + */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14), + /* Then make sure it's nonnegative */ + BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1), + BPF_EXIT_INSN(), + /* Add it to packet pointer */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5), + /* Check bounds and perform a read */ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .matches = { + {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, + /* ptr & 0x40 == either 0 or 0x40 */ + {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, + /* ptr << 2 == unknown, (4n) */ + {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + /* (4n) + 14 == (4n+2). We blow our bounds, because + * the add could overflow. + */ + {8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, + /* Checked s>=0 */ + {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* packet pointer + nonnegative (4n+2) */ + {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. + * We checked the bounds, but it might have been able + * to overflow if the packet pointer started in the + * upper half of the address space. + * So we did not get a 'range' on R6, and the access + * attempt will fail. + */ + {16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + } + }, + { + .descr = "variable subtraction", + .insns = { + /* Create an unknown offset, (4n+2)-aligned */ + LOAD_UNKNOWN(BPF_REG_6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14), + /* Create another unknown, (4n)-aligned, and subtract + * it from the first one + */ + BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7), + /* Bounds-check the result */ + BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1), + BPF_EXIT_INSN(), + /* Add it to the packet pointer */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6), + /* Check bounds and perform a read */ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ + {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, + {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Adding 14 makes R6 be (4n+2) */ + {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* New unknown value in R7 is (4n) */ + {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + /* Subtracting it from R6 blows our unsigned bounds */ + {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, + /* Checked s>= 0 */ + {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (0) + * which is 2. Then the variable offset is (4n+2), so + * the total offset is 4-byte aligned and meets the + * load's requirements. + */ + {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"}, + }, + }, + { + .descr = "pointer variable subtraction", + .insns = { + /* Create an unknown offset, (4n+2)-aligned and bounded + * to [14,74] + */ + LOAD_UNKNOWN(BPF_REG_6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14), + /* Subtract it from the packet pointer */ + BPF_MOV64_REG(BPF_REG_5, BPF_REG_2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6), + /* Create another unknown, (4n)-aligned and >= 74. + * That in fact means >= 76, since 74 % 4 == 2 */ - "33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp", + BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76), + /* Add it to the packet pointer */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7), + /* Check bounds and perform a read */ + BPF_MOV64_REG(BPF_REG_4, BPF_REG_5), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .matches = { + /* Calculated offset in R6 has unknown value, but known + * alignment of 4. + */ + {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, + {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + /* Adding 14 makes R6 be (4n+2) */ + {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + /* Subtracting from packet pointer overflows ubounds */ + {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + /* New unknown value in R7 is (4n), >= 76 */ + {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + /* Adding it to packet pointer gives nice bounds again */ + {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + /* At the time the word size load is performed from R5, + * its total fixed offset is NET_IP_ALIGN + reg->off (0) + * which is 2. Then the variable offset is (4n+2), so + * the total offset is 4-byte aligned and meets the + * load's requirements. + */ + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, }, }, }; @@ -373,6 +619,9 @@ static int do_test_single(struct bpf_align_test *test) { struct bpf_insn *prog = test->insns; int prog_type = test->prog_type; + char bpf_vlog_copy[32768]; + const char *line_ptr; + int cur_line = -1; int prog_len, i; int fd_prog; int ret; @@ -381,26 +630,49 @@ static int do_test_single(struct bpf_align_test *test) fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, prog, prog_len, 1, "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2); - if (fd_prog < 0) { + if (fd_prog < 0 && test->result != REJECT) { printf("Failed to load program.\n"); printf("%s", bpf_vlog); ret = 1; + } else if (fd_prog >= 0 && test->result == REJECT) { + printf("Unexpected success to load!\n"); + printf("%s", bpf_vlog); + ret = 1; + close(fd_prog); } else { ret = 0; + /* We make a local copy so that we can strtok() it */ + strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy)); + line_ptr = strtok(bpf_vlog_copy, "\n"); for (i = 0; i < MAX_MATCHES; i++) { - const char *t, *m = test->matches[i]; + struct bpf_reg_match m = test->matches[i]; - if (!m) + if (!m.match) break; - t = strstr(bpf_vlog, m); - if (!t) { - printf("Failed to find match: %s\n", m); + while (line_ptr) { + cur_line = -1; + sscanf(line_ptr, "%u: ", &cur_line); + if (cur_line == m.line) + break; + line_ptr = strtok(NULL, "\n"); + } + if (!line_ptr) { + printf("Failed to find line %u for match: %s\n", + m.line, m.match); + ret = 1; + printf("%s", bpf_vlog); + break; + } + if (!strstr(line_ptr, m.match)) { + printf("Failed to find match %u: %s\n", + m.line, m.match); ret = 1; printf("%s", bpf_vlog); break; } } - close(fd_prog); + if (fd_prog >= 0) + close(fd_prog); } return ret; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 79601c81e169..fe3a443a1102 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -22,6 +22,7 @@ #include <linux/bpf.h> #include <bpf/bpf.h> +#include <bpf/libbpf.h> #include "bpf_util.h" static int map_flags; @@ -438,6 +439,444 @@ static void test_arraymap_percpu_many_keys(void) close(fd); } +static void test_devmap(int task, void *data) +{ + int fd; + __u32 key, value; + + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value), + 2, 0); + if (fd < 0) { + printf("Failed to create arraymap '%s'!\n", strerror(errno)); + exit(1); + } + + close(fd); +} + +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <arpa/inet.h> +#include <sys/select.h> +#include <linux/err.h> +#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o" +#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o" +static void test_sockmap(int tasks, void *data) +{ + int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; + struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; + int ports[] = {50200, 50201, 50202, 50204}; + int err, i, fd, sfd[6] = {0xdeadbeef}; + u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; + int parse_prog, verdict_prog; + struct sockaddr_in addr; + struct bpf_object *obj; + struct timeval to; + __u32 key, value; + pid_t pid[tasks]; + fd_set w; + + /* Create some sockets to use with sockmap */ + for (i = 0; i < 2; i++) { + sfd[i] = socket(AF_INET, SOCK_STREAM, 0); + if (sfd[i] < 0) + goto out; + err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)); + if (err) { + printf("failed to setsockopt\n"); + goto out; + } + err = ioctl(sfd[i], FIONBIO, (char *)&one); + if (err < 0) { + printf("failed to ioctl\n"); + goto out; + } + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + addr.sin_port = htons(ports[i]); + err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr)); + if (err < 0) { + printf("failed to bind: err %i: %i:%i\n", + err, i, sfd[i]); + goto out; + } + err = listen(sfd[i], 32); + if (err < 0) { + printf("failed to listen\n"); + goto out; + } + } + + for (i = 2; i < 4; i++) { + sfd[i] = socket(AF_INET, SOCK_STREAM, 0); + if (sfd[i] < 0) + goto out; + err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR, + (char *)&one, sizeof(one)); + if (err) { + printf("set sock opt\n"); + goto out; + } + memset(&addr, 0, sizeof(struct sockaddr_in)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + addr.sin_port = htons(ports[i - 2]); + err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr)); + if (err) { + printf("failed to connect\n"); + goto out; + } + } + + + for (i = 4; i < 6; i++) { + sfd[i] = accept(sfd[i - 4], NULL, NULL); + if (sfd[i] < 0) { + printf("accept failed\n"); + goto out; + } + } + + /* Test sockmap with connected sockets */ + fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, + sizeof(key), sizeof(value), + 6, 0); + if (fd < 0) { + printf("Failed to create sockmap %i\n", fd); + goto out_sockmap; + } + + /* Test update without programs */ + for (i = 0; i < 6; i++) { + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); + if (err) { + printf("Failed noprog update sockmap '%i:%i'\n", + i, sfd[i]); + goto out_sockmap; + } + } + + /* Test attaching/detaching bad fds */ + err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0); + if (!err) { + printf("Failed invalid parser prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!err) { + printf("Failed invalid verdict prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0); + if (!err) { + printf("Failed unknown prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER); + if (err) { + printf("Failed empty parser prog detach\n"); + goto out_sockmap; + } + + err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT); + if (err) { + printf("Failed empty verdict prog detach\n"); + goto out_sockmap; + } + + err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE); + if (!err) { + printf("Detach invalid prog successful\n"); + goto out_sockmap; + } + + /* Load SK_SKB program and Attach */ + err = bpf_prog_load(SOCKMAP_PARSE_PROG, + BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog); + if (err) { + printf("Failed to load SK_SKB parse prog\n"); + goto out_sockmap; + } + + err = bpf_prog_load(SOCKMAP_VERDICT_PROG, + BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog); + if (err) { + printf("Failed to load SK_SKB verdict prog\n"); + goto out_sockmap; + } + + bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx"); + if (IS_ERR(bpf_map_rx)) { + printf("Failed to load map rx from verdict prog\n"); + goto out_sockmap; + } + + map_fd_rx = bpf_map__fd(bpf_map_rx); + if (map_fd_rx < 0) { + printf("Failed to get map fd\n"); + goto out_sockmap; + } + + bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx"); + if (IS_ERR(bpf_map_tx)) { + printf("Failed to load map tx from verdict prog\n"); + goto out_sockmap; + } + + map_fd_tx = bpf_map__fd(bpf_map_tx); + if (map_fd_tx < 0) { + printf("Failed to get map tx fd\n"); + goto out_sockmap; + } + + bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break"); + if (IS_ERR(bpf_map_break)) { + printf("Failed to load map tx from verdict prog\n"); + goto out_sockmap; + } + + map_fd_break = bpf_map__fd(bpf_map_break); + if (map_fd_break < 0) { + printf("Failed to get map tx fd\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(parse_prog, map_fd_break, + BPF_SK_SKB_STREAM_PARSER, 0); + if (!err) { + printf("Allowed attaching SK_SKB program to invalid map\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(parse_prog, map_fd_rx, + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + printf("Failed stream parser bpf prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(verdict_prog, map_fd_rx, + BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) { + printf("Failed stream verdict bpf prog attach\n"); + goto out_sockmap; + } + + err = bpf_prog_attach(verdict_prog, map_fd_rx, + __MAX_BPF_ATTACH_TYPE, 0); + if (!err) { + printf("Attached unknown bpf prog\n"); + goto out_sockmap; + } + + /* Test map update elem afterwards fd lives in fd and map_fd */ + for (i = 0; i < 6; i++) { + err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY); + if (err) { + printf("Failed map_fd_rx update sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY); + if (err) { + printf("Failed map_fd_tx update sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + } + + /* Test map delete elem and remove send/recv sockets */ + for (i = 2; i < 4; i++) { + err = bpf_map_delete_elem(map_fd_rx, &i); + if (err) { + printf("Failed delete sockmap rx %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + err = bpf_map_delete_elem(map_fd_tx, &i); + if (err) { + printf("Failed delete sockmap tx %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + } + + /* Test map send/recv */ + for (i = 0; i < 2; i++) { + buf[0] = i; + buf[1] = 0x5; + sc = send(sfd[2], buf, 20, 0); + if (sc < 0) { + printf("Failed sockmap send\n"); + goto out_sockmap; + } + + FD_ZERO(&w); + FD_SET(sfd[3], &w); + to.tv_sec = 1; + to.tv_usec = 0; + s = select(sfd[3] + 1, &w, NULL, NULL, &to); + if (s == -1) { + perror("Failed sockmap select()"); + goto out_sockmap; + } else if (!s) { + printf("Failed sockmap unexpected timeout\n"); + goto out_sockmap; + } + + if (!FD_ISSET(sfd[3], &w)) { + printf("Failed sockmap select/recv\n"); + goto out_sockmap; + } + + rc = recv(sfd[3], buf, sizeof(buf), 0); + if (rc < 0) { + printf("Failed sockmap recv\n"); + goto out_sockmap; + } + } + + /* Negative null entry lookup from datapath should be dropped */ + buf[0] = 1; + buf[1] = 12; + sc = send(sfd[2], buf, 20, 0); + if (sc < 0) { + printf("Failed sockmap send\n"); + goto out_sockmap; + } + + /* Push fd into same slot */ + i = 2; + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST); + if (!err) { + printf("Failed allowed sockmap dup slot BPF_NOEXIST\n"); + goto out_sockmap; + } + + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); + if (err) { + printf("Failed sockmap update new slot BPF_ANY\n"); + goto out_sockmap; + } + + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST); + if (err) { + printf("Failed sockmap update new slot BPF_EXIST\n"); + goto out_sockmap; + } + + /* Delete the elems without programs */ + for (i = 0; i < 6; i++) { + err = bpf_map_delete_elem(fd, &i); + if (err) { + printf("Failed delete sockmap %i '%i:%i'\n", + err, i, sfd[i]); + } + } + + /* Test having multiple maps open and set with programs on same fds */ + err = bpf_prog_attach(parse_prog, fd, + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + printf("Failed fd bpf parse prog attach\n"); + goto out_sockmap; + } + err = bpf_prog_attach(verdict_prog, fd, + BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) { + printf("Failed fd bpf verdict prog attach\n"); + goto out_sockmap; + } + + for (i = 4; i < 6; i++) { + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); + if (!err) { + printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST); + if (!err) { + printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST); + if (!err) { + printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n", + err, i, sfd[i]); + goto out_sockmap; + } + } + + /* Test tasks number of forked operations */ + for (i = 0; i < tasks; i++) { + pid[i] = fork(); + if (pid[i] == 0) { + for (i = 0; i < 6; i++) { + bpf_map_delete_elem(map_fd_tx, &i); + bpf_map_delete_elem(map_fd_rx, &i); + bpf_map_update_elem(map_fd_tx, &i, + &sfd[i], BPF_ANY); + bpf_map_update_elem(map_fd_rx, &i, + &sfd[i], BPF_ANY); + } + exit(0); + } else if (pid[i] == -1) { + printf("Couldn't spawn #%d process!\n", i); + exit(1); + } + } + + for (i = 0; i < tasks; i++) { + int status; + + assert(waitpid(pid[i], &status, 0) == pid[i]); + assert(status == 0); + } + + err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE); + if (!err) { + printf("Detached an invalid prog type.\n"); + goto out_sockmap; + } + + err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER); + if (err) { + printf("Failed parser prog detach\n"); + goto out_sockmap; + } + + err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT); + if (err) { + printf("Failed parser prog detach\n"); + goto out_sockmap; + } + + /* Test map close sockets */ + for (i = 0; i < 6; i++) + close(sfd[i]); + close(fd); + close(map_fd_rx); + bpf_object__close(obj); + return; +out: + for (i = 0; i < 6; i++) + close(sfd[i]); + printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno)); + exit(1); +out_sockmap: + for (i = 0; i < 6; i++) + close(sfd[i]); + close(fd); + exit(1); +} + #define MAP_SIZE (32 * 1024) static void test_map_large(void) @@ -605,6 +1044,9 @@ static void run_all_tests(void) test_arraymap_percpu_many_keys(); + test_devmap(0, NULL); + test_sockmap(0, NULL); + test_map_large(); test_map_parallel(); test_map_stress(); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 1f7dd35551b9..11ee25cea227 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -75,39 +75,6 @@ static struct { __ret; \ }) -static int bpf_prog_load(const char *file, enum bpf_prog_type type, - struct bpf_object **pobj, int *prog_fd) -{ - struct bpf_program *prog; - struct bpf_object *obj; - int err; - - obj = bpf_object__open(file); - if (IS_ERR(obj)) { - error_cnt++; - return -ENOENT; - } - - prog = bpf_program__next(NULL, obj); - if (!prog) { - bpf_object__close(obj); - error_cnt++; - return -ENOENT; - } - - bpf_program__set_type(prog, type); - err = bpf_object__load(obj); - if (err) { - bpf_object__close(obj); - error_cnt++; - return -EINVAL; - } - - *pobj = obj; - *prog_fd = bpf_program__fd(prog); - return 0; -} - static int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) { @@ -130,8 +97,10 @@ static void test_pkt_access(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) + if (err) { + error_cnt++; return; + } err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); @@ -162,8 +131,10 @@ static void test_xdp(void) int err, prog_fd, map_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (err) + if (err) { + error_cnt++; return; + } map_fd = bpf_find_map(__func__, obj, "vip2tnl"); if (map_fd < 0) @@ -223,8 +194,10 @@ static void test_l4lb(void) u32 *magic = (u32 *)buf; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) + if (err) { + error_cnt++; return; + } map_fd = bpf_find_map(__func__, obj, "vip_map"); if (map_fd < 0) @@ -280,8 +253,10 @@ static void test_tcp_estats(void) err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); CHECK(err, "", "err %d errno %d\n", err, errno); - if (err) + if (err) { + error_cnt++; return; + } bpf_object__close(obj); } @@ -304,7 +279,7 @@ static void test_bpf_obj_id(void) /* +1 to test for the info_len returned by kernel */ struct bpf_prog_info prog_infos[nr_iters + 1]; struct bpf_map_info map_infos[nr_iters + 1]; - char jited_insns[128], xlated_insns[128]; + char jited_insns[128], xlated_insns[128], zeros[128]; __u32 i, next_id, info_len, nr_id_found, duration = 0; int sysctl_fd, jit_enabled = 0, err = 0; __u64 array_value; @@ -330,17 +305,22 @@ static void test_bpf_obj_id(void) objs[i] = NULL; /* Check bpf_obj_get_info_by_fd() */ + bzero(zeros, sizeof(zeros)); for (i = 0; i < nr_iters; i++) { err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER, &objs[i], &prog_fds[i]); /* test_obj_id.o is a dumb prog. It should never fail * to load. */ + if (err) + error_cnt++; assert(!err); /* Check getting prog info */ info_len = sizeof(struct bpf_prog_info) * 2; bzero(&prog_infos[i], info_len); + bzero(jited_insns, sizeof(jited_insns)); + bzero(xlated_insns, sizeof(xlated_insns)); prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns); prog_infos[i].jited_prog_len = sizeof(jited_insns); prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns); @@ -351,15 +331,20 @@ static void test_bpf_obj_id(void) prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER || info_len != sizeof(struct bpf_prog_info) || (jit_enabled && !prog_infos[i].jited_prog_len) || - !prog_infos[i].xlated_prog_len, + (jit_enabled && + !memcmp(jited_insns, zeros, sizeof(zeros))) || + !prog_infos[i].xlated_prog_len || + !memcmp(xlated_insns, zeros, sizeof(zeros)), "get-prog-info(fd)", - "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u\n", + "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n", err, errno, i, prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER, info_len, sizeof(struct bpf_prog_info), jit_enabled, prog_infos[i].jited_prog_len, - prog_infos[i].xlated_prog_len)) + prog_infos[i].xlated_prog_len, + !!memcmp(jited_insns, zeros, sizeof(zeros)), + !!memcmp(xlated_insns, zeros, sizeof(zeros)))) goto done; map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id"); @@ -496,8 +481,10 @@ static void test_pkt_md_access(void) int err, prog_fd; err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd); - if (err) + if (err) { + error_cnt++; return; + } err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4), NULL, NULL, &retval, &duration); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index d3ed7324105e..26f3250bdcd2 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -422,7 +422,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R1 pointer arithmetic", + .errstr_unpriv = "R1 subtraction from stack pointer", .result_unpriv = REJECT, .errstr = "R1 invalid mem access", .result = REJECT, @@ -604,8 +604,9 @@ static struct bpf_test tests[] = { BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned stack access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "invalid map_fd for function call", @@ -651,8 +652,9 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr = "misaligned access", + .errstr = "misaligned value access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "sometimes access memory with incorrect alignment", @@ -673,6 +675,7 @@ static struct bpf_test tests[] = { .errstr = "R0 invalid mem access", .errstr_unpriv = "R0 leaks addr", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "jump test 1", @@ -964,6 +967,256 @@ static struct bpf_test tests[] = { .result = REJECT, }, { + "invalid access __sk_buff family", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "invalid access __sk_buff remote_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "invalid access __sk_buff local_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "invalid access __sk_buff remote_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "invalid access __sk_buff local_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "invalid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "invalid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_EXIT_INSN(), + }, + .errstr = "invalid bpf_context access", + .result = REJECT, + }, + { + "valid access __sk_buff family", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, family)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access __sk_buff remote_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access __sk_buff local_ip4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip4)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access __sk_buff remote_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access __sk_buff local_ip6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[0])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[1])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[2])), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_ip6[3])), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, remote_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "valid access __sk_buff remote_port", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, local_port)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "invalid access of tc_classid for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, tc_classid)), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + .errstr = "invalid bpf_context access", + }, + { + "check skb->mark is writeable by SK_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "check skb->tc_index is writeable by SK_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, tc_index)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "check skb->priority is writeable by SK_SKB", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, + offsetof(struct __sk_buff, priority)), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "direct packet read for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "direct packet write for SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { + "overlapping checks for direct packet access SK_SKB", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SK_SKB, + }, + { "check skb->mark is not writeable by sockets", .insns = { BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, @@ -1216,8 +1469,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[0]) + 1), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check __sk_buff->hash, offset 0, half store not permitted", @@ -1320,8 +1574,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[0]) + 2), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check cb access: word, unaligned 2", @@ -1331,8 +1586,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[4]) + 1), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check cb access: word, unaligned 3", @@ -1342,8 +1598,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[4]) + 2), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check cb access: word, unaligned 4", @@ -1353,8 +1610,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[4]) + 3), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check cb access: double", @@ -1380,8 +1638,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[1])), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check cb access: double, unaligned 2", @@ -1391,8 +1650,9 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, cb[3])), BPF_EXIT_INSN(), }, - .errstr = "misaligned access", + .errstr = "misaligned context access", .result = REJECT, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "check cb access: double, oob 1", @@ -1524,7 +1784,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "misaligned access off -6 size 8", + .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8", + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "PTR_TO_STACK store/load - bad alignment on reg", @@ -1536,7 +1797,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "misaligned access off -2 size 8", + .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8", + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, }, { "PTR_TO_STACK store/load - out of bounds low", @@ -1580,8 +1842,6 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, - .result_unpriv = REJECT, - .errstr_unpriv = "R1 pointer arithmetic", }, { "unpriv: add pointer to pointer", @@ -1592,7 +1852,7 @@ static struct bpf_test tests[] = { }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R1 pointer arithmetic", + .errstr_unpriv = "R1 pointer += pointer", }, { "unpriv: neg pointer", @@ -1933,10 +2193,7 @@ static struct bpf_test tests[] = { BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, - .errstr_unpriv = "pointer arithmetic prohibited", - .result_unpriv = REJECT, - .errstr = "R1 invalid mem access", - .result = REJECT, + .result = ACCEPT, }, { "unpriv: cmp of stack pointer", @@ -2000,7 +2257,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3", + .errstr = "R4 min value is negative", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -2017,7 +2274,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3", + .errstr = "R4 min value is negative", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -2219,7 +2476,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-1 access_size=-1", + .errstr = "R4 min value is negative", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -2236,7 +2493,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-1 access_size=2147483647", + .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -2253,7 +2510,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid stack type R3 off=-512 access_size=2147483647", + .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { @@ -2324,8 +2581,8 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, data)), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4), BPF_MOV64_REG(BPF_REG_2, BPF_REG_1), - BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 48), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 48), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49), BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), BPF_MOV64_REG(BPF_REG_2, BPF_REG_3), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8), @@ -2653,7 +2910,7 @@ static struct bpf_test tests[] = { BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), BPF_JMP_A(-6), }, - .errstr = "misaligned packet access off 2+15+-4 size 4", + .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .flags = F_LOAD_WITH_STRICT_ALIGNMENT, @@ -2704,11 +2961,11 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0xffffffff), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff), BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1), BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -2730,10 +2987,10 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff), + BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff), BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1), BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1), BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -2759,7 +3016,7 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_4, 0xffffffff), BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), - BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49), BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2), BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), @@ -2796,7 +3053,7 @@ static struct bpf_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet", + .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", }, { "direct packet access: test24 (x += pkt_ptr, 5)", @@ -2814,7 +3071,7 @@ static struct bpf_test tests[] = { BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1), BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0), @@ -2824,6 +3081,79 @@ static struct bpf_test tests[] = { .result = ACCEPT, }, { + "direct packet access: test25 (marking on <, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -4), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test26 (marking on <, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test27 (marking on <=, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test28 (marking on <=, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -4), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "helper access to packet: test1, valid packet_ptr range", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, @@ -3113,7 +3443,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test14, cls helper fail sub", + "helper access to packet: test14, cls helper ok sub", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3133,12 +3463,36 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test15, cls helper fail sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, .result = REJECT, - .errstr = "type=inv expected=fp", + .errstr = "invalid access to packet", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test15, cls helper fail range 1", + "helper access to packet: test16, cls helper fail range 1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3163,7 +3517,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test16, cls helper fail range 2", + "helper access to packet: test17, cls helper fail range 2", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3184,11 +3538,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid access to packet", + .errstr = "R2 min value is negative", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test17, cls helper fail range 3", + "helper access to packet: test18, cls helper fail range 3", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3209,11 +3563,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = REJECT, - .errstr = "invalid access to packet", + .errstr = "R2 min value is negative", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test18, cls helper fail range zero", + "helper access to packet: test19, cls helper fail range zero", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3238,7 +3592,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test19, pkt end as input", + "helper access to packet: test20, pkt end as input", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3263,7 +3617,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "helper access to packet: test20, wrong reg", + "helper access to packet: test21, wrong reg", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -3323,7 +3677,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -3347,7 +3701,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -3375,7 +3729,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -3416,9 +3770,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is outside of the array range", - .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -3440,9 +3792,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", - .result_unpriv = REJECT, + .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -3456,7 +3806,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), BPF_MOV32_IMM(BPF_REG_1, 0), @@ -3467,8 +3817,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr_unpriv = "R0 leaks addr", + .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -3494,7 +3844,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, @@ -3524,8 +3874,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3, 11 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr_unpriv = "R0 pointer += pointer", + .errstr = "R0 invalid mem access 'inv'", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -3667,34 +4017,6 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS }, { - "multiple registers share map_lookup_elem bad reg type", - .insns = { - BPF_MOV64_IMM(BPF_REG_1, 10), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_4, BPF_REG_0), - BPF_MOV64_REG(BPF_REG_5, BPF_REG_0), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_1, 1), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), - BPF_MOV64_IMM(BPF_REG_1, 2), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), - BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 0), - BPF_MOV64_IMM(BPF_REG_1, 3), - BPF_EXIT_INSN(), - }, - .fixup_map1 = { 4 }, - .result = REJECT, - .errstr = "R3 invalid mem access 'inv'", - .prog_type = BPF_PROG_TYPE_SCHED_CLS - }, - { "invalid map access from else condition", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), @@ -3712,9 +4034,9 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map", + .errstr = "R0 unbounded memory access", .result = REJECT, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, @@ -4092,7 +4414,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=0 size=-8", + .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4158,7 +4480,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "invalid access to map value, value_size=48 off=4 size=0", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4204,7 +4526,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=-8", + .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4226,7 +4548,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4342,7 +4664,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "invalid access to map value, value_size=48 off=4 size=-8", + .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4365,7 +4687,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R1 min value is outside of the array range", + .errstr = "R2 min value is negative", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4453,13 +4775,13 @@ static struct bpf_test tests[] = { BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_MOV64_IMM(BPF_REG_3, 0), BPF_EMIT_CALL(BPF_FUNC_probe_read), BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr = "R1 min value is negative, either use unsigned index or do a if (index >=0) check", + .errstr = "R1 unbounded memory access", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -4490,6 +4812,246 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { + "helper access to map: bounds check using <, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using <, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = REJECT, + .errstr = "R1 unbounded memory access", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using <=, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using <=, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = REJECT, + .errstr = "R1 unbounded memory access", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using s<, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using s<, good access 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using s<, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = REJECT, + .errstr = "R1 min value is negative", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using s<=, good access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using s<=, good access 2", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "helper access to map: bounds check using s<=, bad access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .result = REJECT, + .errstr = "R1 min value is negative", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { "map element value is preserved across register spilling", .insns = { BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -4579,7 +5141,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4607,7 +5169,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4626,7 +5188,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 bitwise operator &= on pointer", .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, @@ -4645,7 +5207,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 32-bit pointer arithmetic prohibited", .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, @@ -4664,7 +5226,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 pointer arithmetic with /= operator", .errstr = "invalid mem access 'inv'", .result = REJECT, .result_unpriv = REJECT, @@ -4707,10 +5269,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 invalid mem access 'inv'", .errstr = "R0 invalid mem access 'inv'", .result = REJECT, - .result_unpriv = REJECT, }, { "map element value is preserved across register spilling", @@ -4732,7 +5292,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 leaks addr", .result = ACCEPT, .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -4914,7 +5474,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "R2 unbounded memory access", + /* because max wasn't checked, signed min is negative */ + .errstr = "R2 min value is negative, either use unsigned or 'var &= const'", .result = REJECT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, @@ -5063,6 +5624,20 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_TRACEPOINT, }, { + "helper access to variable memory: size = 0 allowed on NULL", + .insns = { + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_EMIT_CALL(BPF_FUNC_csum_diff), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { "helper access to variable memory: size > 0 not allowed on NULL", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), @@ -5076,7 +5651,7 @@ static struct bpf_test tests[] = { BPF_EMIT_CALL(BPF_FUNC_csum_diff), BPF_EXIT_INSN(), }, - .errstr = "R1 type=imm expected=fp", + .errstr = "R1 type=inv expected=fp", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -5161,7 +5736,7 @@ static struct bpf_test tests[] = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), - BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4), BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), @@ -5170,10 +5745,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr = "R0 max value is outside of the array range", .result = REJECT, - .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -5202,10 +5775,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map2 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .errstr = "R0 max value is outside of the array range", .result = REJECT, - .result_unpriv = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -5252,7 +5823,7 @@ static struct bpf_test tests[] = { }, .fixup_map_in_map = { 3 }, .errstr = "R1 type=inv expected=map_ptr", - .errstr_unpriv = "R1 pointer arithmetic prohibited", + .errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited", .result = REJECT, }, { @@ -5532,10 +6103,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { "bounds checks mixing signed and unsigned", @@ -5558,10 +6127,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { "bounds checks mixing signed and unsigned, variant 2", @@ -5586,10 +6153,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R8 invalid mem access 'inv'", .result = REJECT, - .result_unpriv = REJECT, }, { "bounds checks mixing signed and unsigned, variant 3", @@ -5613,10 +6178,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R8 invalid mem access 'inv'", .result = REJECT, - .result_unpriv = REJECT, }, { "bounds checks mixing signed and unsigned, variant 4", @@ -5639,10 +6202,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative", - .result = REJECT, - .result_unpriv = REJECT, + .result = ACCEPT, }, { "bounds checks mixing signed and unsigned, variant 5", @@ -5666,10 +6226,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 invalid mem access", + .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { "bounds checks mixing signed and unsigned, variant 6", @@ -5690,10 +6248,8 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R4 min value is negative, either use unsigned", .errstr = "R4 min value is negative, either use unsigned", .result = REJECT, - .result_unpriv = REJECT, }, { "bounds checks mixing signed and unsigned, variant 7", @@ -5716,10 +6272,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative", - .result = REJECT, - .result_unpriv = REJECT, + .result = ACCEPT, }, { "bounds checks mixing signed and unsigned, variant 8", @@ -5730,32 +6283,6 @@ static struct bpf_test tests[] = { BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), - BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024 + 1), - BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2), - BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), - BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative", - .result = REJECT, - .result_unpriv = REJECT, - }, - { - "bounds checks mixing signed and unsigned, variant 9", - .insns = { - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, - BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8), BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), @@ -5770,13 +6297,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { - "bounds checks mixing signed and unsigned, variant 10", + "bounds checks mixing signed and unsigned, variant 9", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5798,13 +6323,10 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", - .errstr = "R0 min value is negative", - .result = REJECT, - .result_unpriv = REJECT, + .result = ACCEPT, }, { - "bounds checks mixing signed and unsigned, variant 11", + "bounds checks mixing signed and unsigned, variant 10", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5826,13 +6348,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { - "bounds checks mixing signed and unsigned, variant 12", + "bounds checks mixing signed and unsigned, variant 11", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5855,13 +6375,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { - "bounds checks mixing signed and unsigned, variant 13", + "bounds checks mixing signed and unsigned, variant 12", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5883,13 +6401,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { - "bounds checks mixing signed and unsigned, variant 14", + "bounds checks mixing signed and unsigned, variant 13", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5914,13 +6430,11 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { - "bounds checks mixing signed and unsigned, variant 15", + "bounds checks mixing signed and unsigned, variant 14", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1, offsetof(struct __sk_buff, mark)), @@ -5946,13 +6460,11 @@ static struct bpf_test tests[] = { BPF_JMP_IMM(BPF_JA, 0, 0, -7), }, .fixup_map1 = { 4 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", .errstr = "R0 min value is negative", .result = REJECT, - .result_unpriv = REJECT, }, { - "bounds checks mixing signed and unsigned, variant 16", + "bounds checks mixing signed and unsigned, variant 15", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -5976,13 +6488,13 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr_unpriv = "R0 pointer comparison prohibited", .errstr = "R0 min value is negative", .result = REJECT, .result_unpriv = REJECT, }, { - "subtraction bounds (map value)", + "subtraction bounds (map value) variant 1", .insns = { BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), @@ -6004,10 +6516,134 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .fixup_map1 = { 3 }, - .errstr_unpriv = "R0 pointer arithmetic prohibited", + .errstr = "R0 max value is outside of the array range", + .result = REJECT, + }, + { + "subtraction bounds (map value) variant 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6), + BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4), + BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", .result = REJECT, + }, + { + "variable-offset ctx access", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + /* add it to skb. We now have either &skb->len or + * &skb->pkt_type, but we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2), + /* dereference it */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .errstr = "variable ctx access var_off=(0x0; 0x4)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, + }, + { + "variable-offset stack access", + .insns = { + /* Fill the top 8 bytes of the stack */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 4-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8), + /* add it to fp. We now have either fp-4 or fp-8, but + * we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* dereference it */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0), + BPF_EXIT_INSN(), + }, + .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, + }, + { + "liveness pruning and write screening", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* branch conditions teach us nothing about R2 */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R0 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_LWT_IN, + }, + { + "varlen_map_value_access pruning", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .errstr = "R0 unbounded memory access", .result_unpriv = REJECT, + .result = REJECT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, + }, + { + "invalid 64-bit BPF_END", + .insns = { + BPF_MOV32_IMM(BPF_REG_0, 0), + { + .code = BPF_ALU64 | BPF_END | BPF_TO_LE, + .dst_reg = BPF_REG_0, + .src_reg = 0, + .off = 0, + .imm = 32, + }, + BPF_EXIT_INSN(), + }, + .errstr = "BPF_END uses reserved fields", + .result = REJECT, }, }; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c new file mode 100644 index 000000000000..ef9e704be140 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_redirect.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2017 VMware + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/bpf.h> +#include "bpf_helpers.h" + +int _version SEC("version") = 1; + +SEC("redirect_to_111") +int xdp_redirect_to_111(struct xdp_md *xdp) +{ + return bpf_redirect(111, 0); +} +SEC("redirect_to_222") +int xdp_redirect_to_222(struct xdp_md *xdp) +{ + return bpf_redirect(222, 0); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh new file mode 100755 index 000000000000..344a3656dea6 --- /dev/null +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -0,0 +1,59 @@ +#!/bin/sh +# Create 2 namespaces with two veth peers, and +# forward packets in-between using generic XDP +# +# NS1(veth11) NS2(veth22) +# | | +# | | +# (veth1, ------ (veth2, +# id:111) id:222) +# | xdp forwarding | +# ------------------ + +cleanup() +{ + if [ "$?" = "0" ]; then + echo "selftests: test_xdp_redirect [PASS]"; + else + echo "selftests: test_xdp_redirect [FAILED]"; + fi + + set +e + ip netns del ns1 2> /dev/null + ip netns del ns2 2> /dev/null +} + +ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null +if [ $? -ne 0 ];then + echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support" + exit 0 +fi +set -e + +ip netns add ns1 +ip netns add ns2 + +trap cleanup 0 2 3 6 9 + +ip link add veth1 index 111 type veth peer name veth11 +ip link add veth2 index 222 type veth peer name veth22 + +ip link set veth11 netns ns1 +ip link set veth22 netns ns2 + +ip link set veth1 up +ip link set veth2 up + +ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11 +ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22 + +ip netns exec ns1 ip link set dev veth11 up +ip netns exec ns2 ip link set dev veth22 up + +ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222 +ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111 + +ip netns exec ns1 ping -c 1 10.1.1.22 +ip netns exec ns2 ping -c 1 10.1.1.11 + +exit 0 diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index 6b214b7b10fb..247b0a1899d7 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -2,14 +2,14 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) +TEST_GEN_PROGS := step_after_suspend_test + ifeq ($(ARCH),x86) -TEST_GEN_PROGS := breakpoint_test +TEST_GEN_PROGS += breakpoint_test endif ifneq (,$(filter $(ARCH),aarch64 arm64)) -TEST_GEN_PROGS := breakpoint_test_arm64 +TEST_GEN_PROGS += breakpoint_test_arm64 endif -TEST_GEN_PROGS += step_after_suspend_test - include ../lib.mk diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c index f63356151ad4..901b85ea6a59 100644 --- a/tools/testing/selftests/breakpoints/breakpoint_test.c +++ b/tools/testing/selftests/breakpoints/breakpoint_test.c @@ -367,11 +367,11 @@ static void launch_tests(void) /* Icebp traps */ ptrace(PTRACE_CONT, child_pid, NULL, 0); - check_success("Test icebp"); + check_success("Test icebp\n"); /* Int 3 traps */ ptrace(PTRACE_CONT, child_pid, NULL, 0); - check_success("Test int 3 trap"); + check_success("Test int 3 trap\n"); ptrace(PTRACE_CONT, child_pid, NULL, 0); } diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c index 763f37fecfb8..cf6778441381 100644 --- a/tools/testing/selftests/capabilities/test_execve.c +++ b/tools/testing/selftests/capabilities/test_execve.c @@ -1,7 +1,6 @@ #define _GNU_SOURCE #include <cap-ng.h> -#include <err.h> #include <linux/capability.h> #include <stdbool.h> #include <string.h> @@ -18,6 +17,8 @@ #include <sys/prctl.h> #include <sys/stat.h> +#include "../kselftest.h" + #ifndef PR_CAP_AMBIENT #define PR_CAP_AMBIENT 47 # define PR_CAP_AMBIENT_IS_SET 1 @@ -27,6 +28,7 @@ #endif static int nerrs; +static pid_t mpid; /* main() pid is used to avoid duplicate test counts */ static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap) { @@ -36,29 +38,32 @@ static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list int buf_len; buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); - if (buf_len < 0) { - err(1, "vsnprintf failed"); - } - if (buf_len >= sizeof(buf)) { - errx(1, "vsnprintf output truncated"); - } + if (buf_len < 0) + ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno)); + + if (buf_len >= sizeof(buf)) + ksft_exit_fail_msg("vsnprintf output truncated\n"); + fd = open(filename, O_WRONLY); if (fd < 0) { if ((errno == ENOENT) && enoent_ok) return; - err(1, "open of %s failed", filename); + ksft_exit_fail_msg("open of %s failed - %s\n", + filename, strerror(errno)); } written = write(fd, buf, buf_len); if (written != buf_len) { if (written >= 0) { - errx(1, "short write to %s", filename); + ksft_exit_fail_msg("short write to %s\n", filename); } else { - err(1, "write to %s failed", filename); + ksft_exit_fail_msg("write to %s failed - %s\n", + filename, strerror(errno)); } } if (close(fd) != 0) { - err(1, "close of %s failed", filename); + ksft_exit_fail_msg("close of %s failed - %s\n", + filename, strerror(errno)); } } @@ -95,11 +100,12 @@ static bool create_and_enter_ns(uid_t inner_uid) */ if (unshare(CLONE_NEWNS) == 0) { - printf("[NOTE]\tUsing global UIDs for tests\n"); + ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n"); if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0) - err(1, "PR_SET_KEEPCAPS"); + ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n", + strerror(errno)); if (setresuid(inner_uid, inner_uid, -1) != 0) - err(1, "setresuid"); + ksft_exit_fail_msg("setresuid - %s\n", strerror(errno)); // Re-enable effective caps capng_get_caps_process(); @@ -107,22 +113,24 @@ static bool create_and_enter_ns(uid_t inner_uid) if (capng_have_capability(CAPNG_PERMITTED, i)) capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg( + "capng_apply - %s\n", strerror(errno)); have_outer_privilege = true; } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) { - printf("[NOTE]\tUsing a user namespace for tests\n"); + ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n"); maybe_write_file("/proc/self/setgroups", "deny"); write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid); write_file("/proc/self/gid_map", "0 %d 1", outer_gid); have_outer_privilege = false; } else { - errx(1, "must be root or be able to create a userns"); + ksft_exit_skip("must be root or be able to create a userns\n"); } if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0) - err(1, "remount everything private"); + ksft_exit_fail_msg("remount everything private - %s\n", + strerror(errno)); return have_outer_privilege; } @@ -131,20 +139,22 @@ static void chdir_to_tmpfs(void) { char cwd[PATH_MAX]; if (getcwd(cwd, sizeof(cwd)) != cwd) - err(1, "getcwd"); + ksft_exit_fail_msg("getcwd - %s\n", strerror(errno)); if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0) - err(1, "mount private tmpfs"); + ksft_exit_fail_msg("mount private tmpfs - %s\n", + strerror(errno)); if (chdir(cwd) != 0) - err(1, "chdir to private tmpfs"); + ksft_exit_fail_msg("chdir to private tmpfs - %s\n", + strerror(errno)); } static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) { int from = openat(fromfd, fromname, O_RDONLY); if (from == -1) - err(1, "open copy source"); + ksft_exit_fail_msg("open copy source - %s\n", strerror(errno)); int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700); @@ -154,10 +164,11 @@ static void copy_fromat_to(int fromfd, const char *fromname, const char *toname) if (sz == 0) break; if (sz < 0) - err(1, "read"); + ksft_exit_fail_msg("read - %s\n", strerror(errno)); if (write(to, buf, sz) != sz) - err(1, "write"); /* no short writes on tmpfs */ + /* no short writes on tmpfs */ + ksft_exit_fail_msg("write - %s\n", strerror(errno)); } close(from); @@ -174,18 +185,20 @@ static bool fork_wait(void) int status; if (waitpid(child, &status, 0) != child || !WIFEXITED(status)) { - printf("[FAIL]\tChild died\n"); + ksft_print_msg("Child died\n"); nerrs++; } else if (WEXITSTATUS(status) != 0) { - printf("[FAIL]\tChild failed\n"); + ksft_print_msg("Child failed\n"); nerrs++; } else { - printf("[OK]\tChild succeeded\n"); + /* don't print this message for mpid */ + if (getpid() != mpid) + ksft_test_result_pass("Passed\n"); } - return false; } else { - err(1, "fork"); + ksft_exit_fail_msg("fork - %s\n", strerror(errno)); + return false; } } @@ -195,7 +208,7 @@ static void exec_other_validate_cap(const char *name, execl(name, name, (eff ? "1" : "0"), (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"), NULL); - err(1, "execl"); + ksft_exit_fail_msg("execl - %s\n", strerror(errno)); } static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient) @@ -209,7 +222,8 @@ static int do_tests(int uid, const char *our_path) int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY); if (ourpath_fd == -1) - err(1, "open '%s'", our_path); + ksft_exit_fail_msg("open '%s' - %s\n", + our_path, strerror(errno)); chdir_to_tmpfs(); @@ -221,30 +235,30 @@ static int do_tests(int uid, const char *our_path) copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_suidroot"); if (chown("validate_cap_suidroot", 0, -1) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_suidnonroot"); if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_sgidroot"); if (chown("validate_cap_sgidroot", -1, 0) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap_sgidnonroot"); if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0) - err(1, "chown"); + ksft_exit_fail_msg("chown - %s\n", strerror(errno)); if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0) - err(1, "chmod"); + ksft_exit_fail_msg("chmod - %s\n", strerror(errno)); } capng_get_caps_process(); @@ -252,147 +266,162 @@ static int do_tests(int uid, const char *our_path) /* Make sure that i starts out clear */ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (uid == 0) { - printf("[RUN]\tRoot => ep\n"); + ksft_print_msg("[RUN]\tRoot => ep\n"); if (fork_wait()) exec_validate_cap(true, true, false, false); } else { - printf("[RUN]\tNon-root => no caps\n"); + ksft_print_msg("[RUN]\tNon-root => no caps\n"); if (fork_wait()) exec_validate_cap(false, false, false, false); } - printf("[OK]\tCheck cap_ambient manipulation rules\n"); + ksft_print_msg("Check cap_ambient manipulation rules\n"); /* We should not be able to add ambient caps yet. */ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) { if (errno == EINVAL) - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE isn't supported\n"); else - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n"); return 1; } - printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); + ksft_test_result_pass( + "PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n"); capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW); capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW); capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) { - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n"); return 1; } - printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); + ksft_test_result_pass( + "PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n"); capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { - printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_RAISE should have succeeded\n"); return 1; } - printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n"); + ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n"); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) { - printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n"); + ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n"); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0) - err(1, "PR_CAP_AMBIENT_CLEAR_ALL"); + ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n", + strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { - printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); + ksft_test_result_fail( + "PR_CAP_AMBIENT_CLEAR_ALL didn't work\n"); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) - err(1, "PR_CAP_AMBIENT_RAISE"); + ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n", + strerror(errno)); capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) { - printf("[FAIL]\tDropping I should have dropped A\n"); + ksft_test_result_fail("Dropping I should have dropped A\n"); return 1; } - printf("[OK]\tBasic manipulation appears to work\n"); + ksft_test_result_pass("Basic manipulation appears to work\n"); capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE); if (capng_apply(CAPNG_SELECT_CAPS) != 0) - err(1, "capng_apply"); + ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno)); if (uid == 0) { - printf("[RUN]\tRoot +i => eip\n"); + ksft_print_msg("[RUN]\tRoot +i => eip\n"); if (fork_wait()) exec_validate_cap(true, true, true, false); } else { - printf("[RUN]\tNon-root +i => i\n"); + ksft_print_msg("[RUN]\tNon-root +i => i\n"); if (fork_wait()) exec_validate_cap(false, false, true, false); } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) - err(1, "PR_CAP_AMBIENT_RAISE"); + ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n", + strerror(errno)); - printf("[RUN]\tUID %d +ia => eipa\n", uid); + ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid); if (fork_wait()) exec_validate_cap(true, true, true, true); /* The remaining tests need real privilege */ if (!have_outer_privilege) { - printf("[SKIP]\tSUID/SGID tests (needs privilege)\n"); + ksft_test_result_skip("SUID/SGID tests (needs privilege)\n"); goto done; } if (uid == 0) { - printf("[RUN]\tRoot +ia, suidroot => eipa\n"); + ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_suidroot", true, true, true, true); - printf("[RUN]\tRoot +ia, suidnonroot => ip\n"); + ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_suidnonroot", false, true, true, false); - printf("[RUN]\tRoot +ia, sgidroot => eipa\n"); + ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_sgidroot", true, true, true, true); if (fork_wait()) { - printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); + ksft_print_msg( + "[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n"); if (setresgid(1, 1, 1) != 0) - err(1, "setresgid"); + ksft_exit_fail_msg("setresgid - %s\n", + strerror(errno)); exec_other_validate_cap("./validate_cap_sgidroot", true, true, true, false); } - printf("[RUN]\tRoot +ia, sgidnonroot => eip\n"); + ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n"); if (fork_wait()) exec_other_validate_cap("./validate_cap_sgidnonroot", true, true, true, false); } else { - printf("[RUN]\tNon-root +ia, sgidnonroot => i\n"); - exec_other_validate_cap("./validate_cap_sgidnonroot", + ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n"); + if (fork_wait()) + exec_other_validate_cap("./validate_cap_sgidnonroot", false, false, true, false); if (fork_wait()) { - printf("[RUN]\tNon-root +ia, sgidroot => i\n"); + ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n"); if (setresgid(1, 1, 1) != 0) - err(1, "setresgid"); + ksft_exit_fail_msg("setresgid - %s\n", + strerror(errno)); exec_other_validate_cap("./validate_cap_sgidroot", false, false, true, false); } } done: + ksft_print_cnts(); return nerrs ? 1 : 0; } @@ -400,23 +429,29 @@ int main(int argc, char **argv) { char *tmp1, *tmp2, *our_path; + ksft_print_header(); + /* Find our path */ tmp1 = strdup(argv[0]); if (!tmp1) - err(1, "strdup"); + ksft_exit_fail_msg("strdup - %s\n", strerror(errno)); tmp2 = dirname(tmp1); our_path = strdup(tmp2); if (!our_path) - err(1, "strdup"); + ksft_exit_fail_msg("strdup - %s\n", strerror(errno)); free(tmp1); + mpid = getpid(); + if (fork_wait()) { - printf("[RUN]\t+++ Tests with uid == 0 +++\n"); + ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n"); return do_tests(0, our_path); } + ksft_print_msg("==================================================\n"); + if (fork_wait()) { - printf("[RUN]\t+++ Tests with uid != 0 +++\n"); + ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n"); return do_tests(1, our_path); } diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c index dd3c45f7b23c..694cd73d4493 100644 --- a/tools/testing/selftests/capabilities/validate_cap.c +++ b/tools/testing/selftests/capabilities/validate_cap.c @@ -1,5 +1,4 @@ #include <cap-ng.h> -#include <err.h> #include <linux/capability.h> #include <stdbool.h> #include <string.h> @@ -7,6 +6,8 @@ #include <sys/prctl.h> #include <sys/auxv.h> +#include "../kselftest.h" + #ifndef PR_CAP_AMBIENT #define PR_CAP_AMBIENT 47 # define PR_CAP_AMBIENT_IS_SET 1 @@ -25,8 +26,10 @@ static bool bool_arg(char **argv, int i) return false; else if (!strcmp(argv[i], "1")) return true; - else - errx(1, "wrong argv[%d]", i); + else { + ksft_exit_fail_msg("wrong argv[%d]\n", i); + return false; + } } int main(int argc, char **argv) @@ -39,7 +42,7 @@ int main(int argc, char **argv) */ if (argc != 5) - errx(1, "wrong argc"); + ksft_exit_fail_msg("wrong argc\n"); #ifdef HAVE_GETAUXVAL if (getauxval(AT_SECURE)) @@ -51,23 +54,26 @@ int main(int argc, char **argv) capng_get_caps_process(); if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) { - printf("[FAIL]\tWrong effective state%s\n", atsec); + ksft_print_msg("Wrong effective state%s\n", atsec); return 1; } + if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) { - printf("[FAIL]\tWrong permitted state%s\n", atsec); + ksft_print_msg("Wrong permitted state%s\n", atsec); return 1; } + if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) { - printf("[FAIL]\tWrong inheritable state%s\n", atsec); + ksft_print_msg("Wrong inheritable state%s\n", atsec); return 1; } if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) { - printf("[FAIL]\tWrong ambient state%s\n", atsec); + ksft_print_msg("Wrong ambient state%s\n", atsec); return 1; } - printf("[OK]\tCapabilities after execve were correct\n"); + ksft_print_msg("%s: Capabilities after execve were correct\n", + "validate_cap:"); return 0; } diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index 98b1d6565f2c..b18b253d7bfb 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -28,6 +28,12 @@ prerequisite() echo "CPU online/offline summary:" online_cpus=`cat $SYSFS/devices/system/cpu/online` online_max=${online_cpus##*-} + + if [[ "$online_cpus" = "$online_max" ]]; then + echo "$msg: since there is only one cpu: $online_cpus" + exit 0 + fi + echo -e "\t Cpus in online state: $online_cpus" offline_cpus=`cat $SYSFS/devices/system/cpu/offline` @@ -89,8 +95,10 @@ online_cpu_expect_success() if ! online_cpu $cpu; then echo $FUNCNAME $cpu: unexpected fail >&2 + exit 1 elif ! cpu_is_online $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 + exit 1 fi } @@ -100,8 +108,10 @@ online_cpu_expect_fail() if online_cpu $cpu 2> /dev/null; then echo $FUNCNAME $cpu: unexpected success >&2 + exit 1 elif ! cpu_is_offline $cpu; then echo $FUNCNAME $cpu: unexpected online >&2 + exit 1 fi } @@ -111,8 +121,10 @@ offline_cpu_expect_success() if ! offline_cpu $cpu; then echo $FUNCNAME $cpu: unexpected fail >&2 + exit 1 elif ! cpu_is_offline $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 + exit 1 fi } @@ -122,8 +134,10 @@ offline_cpu_expect_fail() if offline_cpu $cpu 2> /dev/null; then echo $FUNCNAME $cpu: unexpected success >&2 + exit 1 elif ! cpu_is_online $cpu; then echo $FUNCNAME $cpu: unexpected offline >&2 + exit 1 fi } diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 14a03ea1e21d..abc706cf7702 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -8,15 +8,18 @@ # Released under the terms of the GPL v2. usage() { # errno [message] -[ "$2" ] && echo $2 +[ ! -z "$2" ] && echo $2 echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]" echo " Options:" echo " -h|--help Show help message" echo " -k|--keep Keep passed test logs" echo " -v|--verbose Increase verbosity of test messages" echo " -vv Alias of -v -v (Show all results in stdout)" +echo " -vvv Alias of -v -v -v (Show all commands immediately)" +echo " --fail-unsupported Treat UNSUPPORTED as a failure" echo " -d|--debug Debug mode (trace all shell commands)" echo " -l|--logdir <dir> Save logs on the <dir>" +echo " If <dir> is -, all logs output in console only" exit $1 } @@ -47,7 +50,7 @@ parse_opts() { # opts local OPT_TEST_CASES= local OPT_TEST_DIR= - while [ "$1" ]; do + while [ ! -z "$1" ]; do case "$1" in --help|-h) usage 0 @@ -56,15 +59,20 @@ parse_opts() { # opts KEEP_LOG=1 shift 1 ;; - --verbose|-v|-vv) + --verbose|-v|-vv|-vvv) VERBOSE=$((VERBOSE + 1)) [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1)) + [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2)) shift 1 ;; --debug|-d) DEBUG=1 shift 1 ;; + --fail-unsupported) + UNSUPPORTED_RESULT=1 + shift 1 + ;; --logdir|-l) LOG_DIR=$2 shift 2 @@ -88,7 +96,7 @@ parse_opts() { # opts ;; esac done - if [ "$OPT_TEST_CASES" ]; then + if [ ! -z "$OPT_TEST_CASES" ]; then TEST_CASES=$OPT_TEST_CASES fi } @@ -108,6 +116,7 @@ LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/ KEEP_LOG=0 DEBUG=0 VERBOSE=0 +UNSUPPORTED_RESULT=0 # Parse command-line options parse_opts $* @@ -119,14 +128,20 @@ if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then fi # Preparing logs -LOG_FILE=$LOG_DIR/ftracetest.log -mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" -date > $LOG_FILE +if [ "x$LOG_DIR" = "x-" ]; then + LOG_FILE= + date +else + LOG_FILE=$LOG_DIR/ftracetest.log + mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" + date > $LOG_FILE +fi + prlog() { # messages - echo "$@" | tee -a $LOG_FILE + [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE } catlog() { #file - cat $1 | tee -a $LOG_FILE + [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE } prlog "=== Ftrace unit tests ===" @@ -187,7 +202,7 @@ eval_result() { # sigval $UNSUPPORTED) prlog " [UNSUPPORTED]" UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO" - return 1 # this is not a bug, but the result should be reported. + return $UNSUPPORTED_RESULT # depends on use case ;; $XFAIL) prlog " [XFAIL]" @@ -247,12 +262,20 @@ __run_test() { # testfile # Run one test case run_test() { # testfile local testname=`basename $1` - local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX` + if [ ! -z "$LOG_FILE" ] ; then + local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX` + else + local testlog=/proc/self/fd/1 + fi export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX` testcase $1 echo "execute$INSTANCE: "$1 > $testlog SIG_RESULT=0 - if [ $VERBOSE -ge 2 ]; then + if [ -z "$LOG_FILE" ]; then + __run_test $1 2>&1 + elif [ $VERBOSE -ge 3 ]; then + __run_test $1 | tee -a $testlog 2>&1 + elif [ $VERBOSE -eq 2 ]; then __run_test $1 2>> $testlog | tee -a $testlog else __run_test $1 >> $testlog 2>&1 @@ -260,9 +283,9 @@ run_test() { # testfile eval_result $SIG_RESULT if [ $? -eq 0 ]; then # Remove test log if the test was done as it was expected. - [ $KEEP_LOG -eq 0 ] && rm $testlog + [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog else - [ $VERBOSE -ge 1 ] && catlog $testlog + [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog TOTAL_RESULT=1 fi rm -rf $TMPDIR diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc index 2a1cb9908746..a4fd4c851a5b 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc @@ -1,6 +1,8 @@ #!/bin/sh # description: Register/unregister many kprobe events +[ -f kprobe_events ] || exit_unsupported # this is configurable + # ftrace fentry skip size depends on the machine architecture. # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le case `uname -m` in diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 7c647f619d63..f0c0369ccb79 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -7,14 +7,17 @@ TEST_PROGS := run.sh include ../lib.mk all: - for DIR in $(SUBDIRS); do \ + @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + if [ -e $$DIR/$(TEST_PROGS) ]; then + rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; + fi done override define RUN_TESTS - $(OUTPUT)/run.sh + @cd $(OUTPUT); ./run.sh endef override define INSTALL_RULE @@ -33,7 +36,7 @@ override define EMIT_TESTS endef override define CLEAN - for DIR in $(SUBDIRS); do \ + @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c index d24ab7421e73..54cd5c414e82 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -394,9 +394,11 @@ int main(int argc, char *argv[]) } } - printf("%s: Test requeue functionality\n", basename(argv[0])); - printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", - broadcast, locked, owner, timeout_ns); + ksft_print_header(); + ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0])); + ksft_print_msg( + "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, locked, owner, timeout_ns); /* * FIXME: unit_test is obsolete now that we parse options and the diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c index e0a798ad0d21..08187a16507f 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -78,7 +78,8 @@ int main(int argc, char *argv[]) } } - printf("%s: Detect mismatched requeue_pi operations\n", + ksft_print_header(); + ksft_print_msg("%s: Detect mismatched requeue_pi operations\n", basename(argv[0])); if (pthread_create(&child, NULL, blocking_child, NULL)) { diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c index 982f83577501..f0542a344d95 100644 --- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -143,9 +143,10 @@ int main(int argc, char *argv[]) } } - printf("%s: Test signal handling during requeue_pi\n", + ksft_print_header(); + ksft_print_msg("%s: Test signal handling during requeue_pi\n", basename(argv[0])); - printf("\tArguments: <none>\n"); + ksft_print_msg("\tArguments: <none>\n"); sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c index bdc48dc047e5..6216de828093 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -97,8 +97,10 @@ int main(int argc, char **argv) } } - printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n", - basename(argv[0])); + ksft_print_header(); + ksft_print_msg( + "%s: Test the futex value of private file mappings in FUTEX_WAIT\n", + basename(argv[0])); ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); if (ret < 0) { diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index 6aadd560366e..bab3dfe1787f 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -68,9 +68,10 @@ int main(int argc, char *argv[]) } } - printf("%s: Block on a futex and wait for timeout\n", + ksft_print_header(); + ksft_print_msg("%s: Block on a futex and wait for timeout\n", basename(argv[0])); - printf("\tArguments: timeout=%ldns\n", timeout_ns); + ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); /* initialize timeout */ to.tv_sec = 0; diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c index d237a8b702f0..26975322545b 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -99,7 +99,8 @@ int main(int argc, char **argv) exit(1); } - printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n", + ksft_print_header(); + ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n", basename(argv[0])); diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 9a2c56fa7305..da15a63269b4 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -64,7 +64,8 @@ int main(int argc, char *argv[]) } } - printf("%s: Test the unexpected futex value in FUTEX_WAIT\n", + ksft_print_header(); + ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n", basename(argv[0])); info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h index 4e7944984fbb..01989644e50a 100644 --- a/tools/testing/selftests/futex/include/logging.h +++ b/tools/testing/selftests/futex/include/logging.h @@ -109,22 +109,20 @@ void log_verbosity(int level) */ void print_result(const char *test_name, int ret) { - const char *result = "Unknown return code"; - switch (ret) { case RET_PASS: - ksft_inc_pass_cnt(); - result = PASS; - break; + ksft_test_result_pass("%s\n", test_name); + ksft_print_cnts(); + return; case RET_ERROR: - result = ERROR; - break; + ksft_test_result_error("%s\n", test_name); + ksft_print_cnts(); + return; case RET_FAIL: - ksft_inc_fail_cnt(); - result = FAIL; - break; + ksft_test_result_fail("%s\n", test_name); + ksft_print_cnts(); + return; } - printf("selftests: %s [%s]\n", test_name, result); } /* log level macros */ diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 849a90ffe8dd..a97e24edde39 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -1,7 +1,9 @@ CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE LDLIBS := $(LDLIBS) -lm +ifeq (,$(filter $(ARCH),x86)) TEST_GEN_FILES := msr aperf +endif TEST_PROGS := run.sh diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index 7868c106b8b1..d3ab48f91cd6 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -29,13 +29,12 @@ EVALUATE_ONLY=0 -max_cpus=$(($(nproc)-1)) +if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then + echo "$0 # Skipped: Test can only run on x86 architectures." + exit 0 +fi -# compile programs -gcc aperf.c -Wall -D_GNU_SOURCE -o aperf -lm -[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1 -gcc -o msr msr.c -lm -[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1 +max_cpus=$(($(nproc)-1)) function run_test () { diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c index a5a4da856dfe..73684c4a1ed6 100644 --- a/tools/testing/selftests/kcmp/kcmp_test.c +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -8,7 +8,6 @@ #include <errno.h> #include <string.h> #include <fcntl.h> - #include <linux/unistd.h> #include <linux/kcmp.h> @@ -16,20 +15,28 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/wait.h> +#include <sys/epoll.h> #include "../kselftest.h" -static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2) +static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2) { return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2); } +static const unsigned int duped_num = 64; + int main(int argc, char **argv) { const char kpath[] = "kcmp-test-file"; + struct kcmp_epoll_slot epoll_slot; + struct epoll_event ev; int pid1, pid2; + int pipefd[2]; int fd1, fd2; + int epollfd; int status; + int fddup; fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644); pid1 = getpid(); @@ -39,6 +46,37 @@ int main(int argc, char **argv) ksft_exit_fail(); } + if (pipe(pipefd)) { + perror("Can't create pipe"); + ksft_exit_fail(); + } + + epollfd = epoll_create1(0); + if (epollfd < 0) { + perror("epoll_create1 failed"); + ksft_exit_fail(); + } + + memset(&ev, 0xff, sizeof(ev)); + ev.events = EPOLLIN | EPOLLOUT; + + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefd[0], &ev)) { + perror("epoll_ctl failed"); + ksft_exit_fail(); + } + + fddup = dup2(pipefd[1], duped_num); + if (fddup < 0) { + perror("dup2 failed"); + ksft_exit_fail(); + } + + if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fddup, &ev)) { + perror("epoll_ctl failed"); + ksft_exit_fail(); + } + close(fddup); + pid2 = fork(); if (pid2 < 0) { perror("fork failed"); @@ -95,6 +133,24 @@ int main(int argc, char **argv) ksft_inc_pass_cnt(); } + /* Compare epoll target */ + epoll_slot = (struct kcmp_epoll_slot) { + .efd = epollfd, + .tfd = duped_num, + .toff = 0, + }; + ret = sys_kcmp(pid1, pid1, KCMP_EPOLL_TFD, pipefd[1], + (unsigned long)(void *)&epoll_slot); + if (ret) { + printf("FAIL: 0 expected but %d returned (%s)\n", + ret, strerror(errno)); + ksft_inc_fail_cnt(); + ret = -1; + } else { + printf("PASS: 0 returned as expected\n"); + ksft_inc_pass_cnt(); + } + ksft_print_cnts(); if (ret) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 08e90c2cc5cb..1ae565ed9bf0 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -19,7 +19,8 @@ #define KSFT_FAIL 1 #define KSFT_XFAIL 2 #define KSFT_XPASS 3 -#define KSFT_SKIP 4 +/* Treat skip as pass */ +#define KSFT_SKIP KSFT_PASS /* counters */ struct ksft_count { @@ -28,6 +29,7 @@ struct ksft_count { unsigned int ksft_xfail; unsigned int ksft_xpass; unsigned int ksft_xskip; + unsigned int ksft_error; }; static struct ksft_count ksft_cnt; @@ -36,7 +38,7 @@ static inline int ksft_test_num(void) { return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail + ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass + - ksft_cnt.ksft_xskip; + ksft_cnt.ksft_xskip + ksft_cnt.ksft_error; } static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; } @@ -44,6 +46,14 @@ static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; } static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; } static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; } static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; } +static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; } + +static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; } +static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; } +static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; } +static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; } +static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; } +static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { @@ -52,6 +62,10 @@ static inline void ksft_print_header(void) static inline void ksft_print_cnts(void) { + printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n", + ksft_cnt.ksft_pass, ksft_cnt.ksft_fail, + ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass, + ksft_cnt.ksft_xskip, ksft_cnt.ksft_error); printf("1..%d\n", ksft_test_num()); } @@ -101,6 +115,18 @@ static inline void ksft_test_result_skip(const char *msg, ...) va_end(args); } +static inline void ksft_test_result_error(const char *msg, ...) +{ + va_list args; + + ksft_cnt.ksft_error++; + + va_start(args, msg); + printf("not ok %d # error ", ksft_test_num()); + vprintf(msg, args); + va_end(args); +} + static inline int ksft_exit_pass(void) { ksft_print_cnts(); diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index c56f72e07cd7..e81bd28bdd89 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -51,6 +51,9 @@ #define __KSELFTEST_HARNESS_H #define _GNU_SOURCE +#include <asm/types.h> +#include <errno.h> +#include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -84,6 +87,14 @@ * E.g., #define TH_LOG_ENABLED 1 * * If no definition is provided, logging is enabled by default. + * + * If there is no way to print an error message for the process running the + * test (e.g. not allowed to write to stderr), it is still possible to get the + * ASSERT_* number for which the test failed. This behavior can be enabled by + * writing `_metadata->no_print = true;` before the check sequence that is + * unable to print. When an error occur, instead of printing an error message + * and calling `abort(3)`, the test process call `_exit(2)` with the assert + * number as argument, which is then printed by the parent process. */ #define TH_LOG(fmt, ...) do { \ if (TH_LOG_ENABLED) \ @@ -555,12 +566,18 @@ * return while still providing an optional block to the API consumer. */ #define OPTIONAL_HANDLER(_assert) \ - for (; _metadata->trigger; _metadata->trigger = __bail(_assert)) + for (; _metadata->trigger; _metadata->trigger = \ + __bail(_assert, _metadata->no_print, _metadata->step)) + +#define __INC_STEP(_metadata) \ + if (_metadata->passed && _metadata->step < 255) \ + _metadata->step++; #define __EXPECT(_expected, _seen, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ __typeof__(_expected) __exp = (_expected); \ __typeof__(_seen) __seen = (_seen); \ + if (_assert) __INC_STEP(_metadata); \ if (!(__exp _t __seen)) { \ unsigned long long __exp_print = (uintptr_t)__exp; \ unsigned long long __seen_print = (uintptr_t)__seen; \ @@ -576,6 +593,7 @@ #define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ const char *__exp = (_expected); \ const char *__seen = (_seen); \ + if (_assert) __INC_STEP(_metadata); \ if (!(strcmp(__exp, __seen) _t 0)) { \ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ _metadata->passed = 0; \ @@ -590,6 +608,8 @@ struct __test_metadata { int termsig; int passed; int trigger; /* extra handler after the evaluation */ + __u8 step; + bool no_print; /* manual trigger when TH_LOG_STREAM is not available */ struct __test_metadata *prev, *next; }; @@ -634,10 +654,13 @@ static inline void __register_test(struct __test_metadata *t) } } -static inline int __bail(int for_realz) +static inline int __bail(int for_realz, bool no_print, __u8 step) { - if (for_realz) + if (for_realz) { + if (no_print) + _exit(step); abort(); + } return 0; } @@ -655,18 +678,24 @@ void __run_test(struct __test_metadata *t) t->passed = 0; } else if (child_pid == 0) { t->fn(t); - _exit(t->passed); + /* return the step that failed or 0 */ + _exit(t->passed ? 0 : t->step); } else { /* TODO(wad) add timeout support. */ waitpid(child_pid, &status, 0); if (WIFEXITED(status)) { - t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0; + t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0; if (t->termsig != -1) { fprintf(TH_LOG_STREAM, "%s: Test exited normally " "instead of by signal (code: %d)\n", t->name, WEXITSTATUS(status)); + } else if (!t->passed) { + fprintf(TH_LOG_STREAM, + "%s: Test failed at step #%d\n", + t->name, + WEXITSTATUS(status)); } } else if (WIFSIGNALED(status)) { t->passed = 0; diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 959273c3a52e..f65886af7c0c 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -6,20 +6,49 @@ ifeq (0,$(MAKELEVEL)) OUTPUT := $(shell pwd) endif +# The following are built by lib.mk common compile rules. +# TEST_CUSTOM_PROGS should be used by tests that require +# custom build rule and prevent common build rule use. +# TEST_PROGS are for test shell scripts. +# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests +# and install targets. Common clean doesn't touch them. TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) +TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) +.ONESHELL: define RUN_TESTS - @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \ + @test_num=`echo 0`; + @echo "TAP version 13"; + @for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ - cd `dirname $$TEST`; (./$$BASENAME_TEST && echo "selftests: $$BASENAME_TEST [PASS]") || echo "selftests: $$BASENAME_TEST [FAIL]"; cd -;\ + test_num=`echo $$test_num+1 | bc`; \ + echo "selftests: $$BASENAME_TEST"; \ + echo "========================================"; \ + if [ ! -x $$TEST ]; then \ + echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\ + echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \ + else \ + cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ + fi; \ done; endef run_tests: all - $(RUN_TESTS) +ifneq ($(KBUILD_SRC),) + @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then + @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT) + fi + @if [ "X$(TEST_PROGS)" != "X" ]; then + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) + else + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)) + fi +else + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) +endif define INSTALL_RULE @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ @@ -27,10 +56,10 @@ define INSTALL_RULE echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ + @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ + echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ + rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ fi endef @@ -42,15 +71,20 @@ else endif define EMIT_TESTS - @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \ + @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ + echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ done; endef emit_tests: $(EMIT_TESTS) +# define if isn't already. It is undefined in make O= case. +ifeq ($(RM),) +RM := rm -f +endif + define CLEAN $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) endef @@ -58,6 +92,15 @@ endef clean: $(CLEAN) +# When make O= with kselftest target from main level +# the following aren't defined. +# +ifneq ($(KBUILD_SRC),) +LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) +COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c +LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) +endif + $(OUTPUT)/%:%.c $(LINK.c) $^ $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index ad8a0897e47f..bc9d02d615da 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -3,7 +3,7 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_fuse_test.sh +TEST_PROGS := run_tests.sh TEST_GEN_FILES := memfd_test fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c index 67908b18f035..7f3617274bf5 100644 --- a/tools/testing/selftests/memfd/fuse_test.c +++ b/tools/testing/selftests/memfd/fuse_test.c @@ -33,7 +33,7 @@ #include <unistd.h> #define MFD_DEF_SIZE 8192 -#define STACK_SIZE 65535 +#define STACK_SIZE 65536 static int sys_memfd_create(const char *name, unsigned int flags) diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 26546892cd54..f94c6d1fb46f 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -18,12 +18,48 @@ #include <sys/wait.h> #include <unistd.h> +#define MEMFD_STR "memfd:" +#define SHARED_FT_STR "(shared file-table)" + #define MFD_DEF_SIZE 8192 #define STACK_SIZE 65536 +/* + * Default is not to test hugetlbfs + */ +static int hugetlbfs_test; +static size_t mfd_def_size = MFD_DEF_SIZE; + +/* + * Copied from mlock2-tests.c + */ +static unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + static int sys_memfd_create(const char *name, unsigned int flags) { + if (hugetlbfs_test) + flags |= MFD_HUGETLB; + return syscall(__NR_memfd_create, name, flags); } @@ -150,7 +186,7 @@ static void *mfd_assert_mmap_shared(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, @@ -168,7 +204,7 @@ static void *mfd_assert_mmap_private(int fd) void *p; p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ, MAP_PRIVATE, fd, @@ -223,7 +259,7 @@ static void mfd_assert_read(int fd) /* verify PROT_READ *is* allowed */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ, MAP_PRIVATE, fd, @@ -232,11 +268,11 @@ static void mfd_assert_read(int fd) printf("mmap() failed: %m\n"); abort(); } - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); /* verify MAP_PRIVATE is *always* allowed (even writable) */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, @@ -245,7 +281,7 @@ static void mfd_assert_read(int fd) printf("mmap() failed: %m\n"); abort(); } - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); } static void mfd_assert_write(int fd) @@ -254,16 +290,22 @@ static void mfd_assert_write(int fd) void *p; int r; - /* verify write() succeeds */ - l = write(fd, "\0\0\0\0", 4); - if (l != 4) { - printf("write() failed: %m\n"); - abort(); + /* + * huegtlbfs does not support write, but we want to + * verify everything else here. + */ + if (!hugetlbfs_test) { + /* verify write() succeeds */ + l = write(fd, "\0\0\0\0", 4); + if (l != 4) { + printf("write() failed: %m\n"); + abort(); + } } /* verify PROT_READ | PROT_WRITE is allowed */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, @@ -273,11 +315,11 @@ static void mfd_assert_write(int fd) abort(); } *(char *)p = 0; - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); /* verify PROT_WRITE is allowed */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_WRITE, MAP_SHARED, fd, @@ -287,12 +329,12 @@ static void mfd_assert_write(int fd) abort(); } *(char *)p = 0; - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); /* verify PROT_READ with MAP_SHARED is allowed and a following * mprotect(PROT_WRITE) allows writing */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ, MAP_SHARED, fd, @@ -302,20 +344,20 @@ static void mfd_assert_write(int fd) abort(); } - r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE); + r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE); if (r < 0) { printf("mprotect() failed: %m\n"); abort(); } *(char *)p = 0; - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); /* verify PUNCH_HOLE works */ r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, - MFD_DEF_SIZE); + mfd_def_size); if (r < 0) { printf("fallocate(PUNCH_HOLE) failed: %m\n"); abort(); @@ -337,7 +379,7 @@ static void mfd_fail_write(int fd) /* verify PROT_READ | PROT_WRITE is not allowed */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, @@ -349,7 +391,7 @@ static void mfd_fail_write(int fd) /* verify PROT_WRITE is not allowed */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_WRITE, MAP_SHARED, fd, @@ -362,13 +404,13 @@ static void mfd_fail_write(int fd) /* Verify PROT_READ with MAP_SHARED with a following mprotect is not * allowed. Note that for r/w the kernel already prevents the mmap. */ p = mmap(NULL, - MFD_DEF_SIZE, + mfd_def_size, PROT_READ, MAP_SHARED, fd, 0); if (p != MAP_FAILED) { - r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE); + r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE); if (r >= 0) { printf("mmap()+mprotect() didn't fail as expected\n"); abort(); @@ -379,7 +421,7 @@ static void mfd_fail_write(int fd) r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, - MFD_DEF_SIZE); + mfd_def_size); if (r >= 0) { printf("fallocate(PUNCH_HOLE) didn't fail as expected\n"); abort(); @@ -390,13 +432,13 @@ static void mfd_assert_shrink(int fd) { int r, fd2; - r = ftruncate(fd, MFD_DEF_SIZE / 2); + r = ftruncate(fd, mfd_def_size / 2); if (r < 0) { printf("ftruncate(SHRINK) failed: %m\n"); abort(); } - mfd_assert_size(fd, MFD_DEF_SIZE / 2); + mfd_assert_size(fd, mfd_def_size / 2); fd2 = mfd_assert_open(fd, O_RDWR | O_CREAT | O_TRUNC, @@ -410,7 +452,7 @@ static void mfd_fail_shrink(int fd) { int r; - r = ftruncate(fd, MFD_DEF_SIZE / 2); + r = ftruncate(fd, mfd_def_size / 2); if (r >= 0) { printf("ftruncate(SHRINK) didn't fail as expected\n"); abort(); @@ -425,31 +467,31 @@ static void mfd_assert_grow(int fd) { int r; - r = ftruncate(fd, MFD_DEF_SIZE * 2); + r = ftruncate(fd, mfd_def_size * 2); if (r < 0) { printf("ftruncate(GROW) failed: %m\n"); abort(); } - mfd_assert_size(fd, MFD_DEF_SIZE * 2); + mfd_assert_size(fd, mfd_def_size * 2); r = fallocate(fd, 0, 0, - MFD_DEF_SIZE * 4); + mfd_def_size * 4); if (r < 0) { printf("fallocate(ALLOC) failed: %m\n"); abort(); } - mfd_assert_size(fd, MFD_DEF_SIZE * 4); + mfd_assert_size(fd, mfd_def_size * 4); } static void mfd_fail_grow(int fd) { int r; - r = ftruncate(fd, MFD_DEF_SIZE * 2); + r = ftruncate(fd, mfd_def_size * 2); if (r >= 0) { printf("ftruncate(GROW) didn't fail as expected\n"); abort(); @@ -458,7 +500,7 @@ static void mfd_fail_grow(int fd) r = fallocate(fd, 0, 0, - MFD_DEF_SIZE * 4); + mfd_def_size * 4); if (r >= 0) { printf("fallocate(ALLOC) didn't fail as expected\n"); abort(); @@ -467,25 +509,37 @@ static void mfd_fail_grow(int fd) static void mfd_assert_grow_write(int fd) { - static char buf[MFD_DEF_SIZE * 8]; + static char *buf; ssize_t l; - l = pwrite(fd, buf, sizeof(buf), 0); - if (l != sizeof(buf)) { + buf = malloc(mfd_def_size * 8); + if (!buf) { + printf("malloc(%d) failed: %m\n", mfd_def_size * 8); + abort(); + } + + l = pwrite(fd, buf, mfd_def_size * 8, 0); + if (l != (mfd_def_size * 8)) { printf("pwrite() failed: %m\n"); abort(); } - mfd_assert_size(fd, MFD_DEF_SIZE * 8); + mfd_assert_size(fd, mfd_def_size * 8); } static void mfd_fail_grow_write(int fd) { - static char buf[MFD_DEF_SIZE * 8]; + static char *buf; ssize_t l; - l = pwrite(fd, buf, sizeof(buf), 0); - if (l == sizeof(buf)) { + buf = malloc(mfd_def_size * 8); + if (!buf) { + printf("malloc(%d) failed: %m\n", mfd_def_size * 8); + abort(); + } + + l = pwrite(fd, buf, mfd_def_size * 8, 0); + if (l == (mfd_def_size * 8)) { printf("pwrite() didn't fail as expected\n"); abort(); } @@ -543,6 +597,8 @@ static void test_create(void) char buf[2048]; int fd; + printf("%s CREATE\n", MEMFD_STR); + /* test NULL name */ mfd_fail_new(NULL, 0); @@ -570,13 +626,18 @@ static void test_create(void) fd = mfd_assert_new("", 0, MFD_CLOEXEC); close(fd); - /* verify MFD_ALLOW_SEALING is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); - close(fd); - - /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ - fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); - close(fd); + if (!hugetlbfs_test) { + /* verify MFD_ALLOW_SEALING is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING); + close(fd); + + /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */ + fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC); + close(fd); + } else { + /* sealing is not supported on hugetlbfs */ + mfd_fail_new("", MFD_ALLOW_SEALING); + } } /* @@ -587,8 +648,14 @@ static void test_basic(void) { int fd; + /* hugetlbfs does not contain sealing support */ + if (hugetlbfs_test) + return; + + printf("%s BASIC\n", MEMFD_STR); + fd = mfd_assert_new("kern_memfd_basic", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); /* add basic seals */ @@ -619,7 +686,7 @@ static void test_basic(void) /* verify sealing does not work without MFD_ALLOW_SEALING */ fd = mfd_assert_new("kern_memfd_basic", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC); mfd_assert_has_seals(fd, F_SEAL_SEAL); mfd_fail_add_seals(fd, F_SEAL_SHRINK | @@ -630,6 +697,28 @@ static void test_basic(void) } /* + * hugetlbfs doesn't support seals or write, so just verify grow and shrink + * on a hugetlbfs file created via memfd_create. + */ +static void test_hugetlbfs_grow_shrink(void) +{ + int fd; + + printf("%s HUGETLBFS-GROW-SHRINK\n", MEMFD_STR); + + fd = mfd_assert_new("kern_memfd_seal_write", + mfd_def_size, + MFD_CLOEXEC); + + mfd_assert_read(fd); + mfd_assert_write(fd); + mfd_assert_shrink(fd); + mfd_assert_grow(fd); + + close(fd); +} + +/* * Test SEAL_WRITE * Test whether SEAL_WRITE actually prevents modifications. */ @@ -637,8 +726,17 @@ static void test_seal_write(void) { int fd; + /* + * hugetlbfs does not contain sealing or write support. Just test + * basic grow and shrink via test_hugetlbfs_grow_shrink. + */ + if (hugetlbfs_test) + return test_hugetlbfs_grow_shrink(); + + printf("%s SEAL-WRITE\n", MEMFD_STR); + fd = mfd_assert_new("kern_memfd_seal_write", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); mfd_assert_add_seals(fd, F_SEAL_WRITE); @@ -661,8 +759,14 @@ static void test_seal_shrink(void) { int fd; + /* hugetlbfs does not contain sealing support */ + if (hugetlbfs_test) + return; + + printf("%s SEAL-SHRINK\n", MEMFD_STR); + fd = mfd_assert_new("kern_memfd_seal_shrink", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); mfd_assert_add_seals(fd, F_SEAL_SHRINK); @@ -685,8 +789,14 @@ static void test_seal_grow(void) { int fd; + /* hugetlbfs does not contain sealing support */ + if (hugetlbfs_test) + return; + + printf("%s SEAL-GROW\n", MEMFD_STR); + fd = mfd_assert_new("kern_memfd_seal_grow", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); mfd_assert_add_seals(fd, F_SEAL_GROW); @@ -709,8 +819,14 @@ static void test_seal_resize(void) { int fd; + /* hugetlbfs does not contain sealing support */ + if (hugetlbfs_test) + return; + + printf("%s SEAL-RESIZE\n", MEMFD_STR); + fd = mfd_assert_new("kern_memfd_seal_resize", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW); @@ -726,15 +842,52 @@ static void test_seal_resize(void) } /* + * hugetlbfs does not support seals. Basic test to dup the memfd created + * fd and perform some basic operations on it. + */ +static void hugetlbfs_dup(char *b_suffix) +{ + int fd, fd2; + + printf("%s HUGETLBFS-DUP %s\n", MEMFD_STR, b_suffix); + + fd = mfd_assert_new("kern_memfd_share_dup", + mfd_def_size, + MFD_CLOEXEC); + + fd2 = mfd_assert_dup(fd); + + mfd_assert_read(fd); + mfd_assert_write(fd); + + mfd_assert_shrink(fd2); + mfd_assert_grow(fd2); + + close(fd2); + close(fd); +} + +/* * Test sharing via dup() * Test that seals are shared between dupped FDs and they're all equal. */ -static void test_share_dup(void) +static void test_share_dup(char *banner, char *b_suffix) { int fd, fd2; + /* + * hugetlbfs does not contain sealing support. Perform some + * basic testing on dup'ed fd instead via hugetlbfs_dup. + */ + if (hugetlbfs_test) { + hugetlbfs_dup(b_suffix); + return; + } + + printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + fd = mfd_assert_new("kern_memfd_share_dup", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); @@ -768,13 +921,19 @@ static void test_share_dup(void) * Test sealing with active mmap()s * Modifying seals is only allowed if no other mmap() refs exist. */ -static void test_share_mmap(void) +static void test_share_mmap(char *banner, char *b_suffix) { int fd; void *p; + /* hugetlbfs does not contain sealing support */ + if (hugetlbfs_test) + return; + + printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + fd = mfd_assert_new("kern_memfd_share_mmap", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); @@ -784,14 +943,40 @@ static void test_share_mmap(void) mfd_assert_has_seals(fd, 0); mfd_assert_add_seals(fd, F_SEAL_SHRINK); mfd_assert_has_seals(fd, F_SEAL_SHRINK); - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); /* readable ref allows sealing */ p = mfd_assert_mmap_private(fd); mfd_assert_add_seals(fd, F_SEAL_WRITE); mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK); - munmap(p, MFD_DEF_SIZE); + munmap(p, mfd_def_size); + + close(fd); +} + +/* + * Basic test to make sure we can open the hugetlbfs fd via /proc and + * perform some simple operations on it. + */ +static void hugetlbfs_proc_open(char *b_suffix) +{ + int fd, fd2; + + printf("%s HUGETLBFS-PROC-OPEN %s\n", MEMFD_STR, b_suffix); + fd = mfd_assert_new("kern_memfd_share_open", + mfd_def_size, + MFD_CLOEXEC); + + fd2 = mfd_assert_open(fd, O_RDWR, 0); + + mfd_assert_read(fd); + mfd_assert_write(fd); + + mfd_assert_shrink(fd2); + mfd_assert_grow(fd2); + + close(fd2); close(fd); } @@ -801,12 +986,23 @@ static void test_share_mmap(void) * This is *not* like dup(), but like a real separate open(). Make sure the * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR. */ -static void test_share_open(void) +static void test_share_open(char *banner, char *b_suffix) { int fd, fd2; + /* + * hugetlbfs does not contain sealing support. So test basic + * functionality of using /proc fd via hugetlbfs_proc_open + */ + if (hugetlbfs_test) { + hugetlbfs_proc_open(b_suffix); + return; + } + + printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + fd = mfd_assert_new("kern_memfd_share_open", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); @@ -841,13 +1037,19 @@ static void test_share_open(void) * Test sharing via fork() * Test whether seal-modifications work as expected with forked childs. */ -static void test_share_fork(void) +static void test_share_fork(char *banner, char *b_suffix) { int fd; pid_t pid; + /* hugetlbfs does not contain sealing support */ + if (hugetlbfs_test) + return; + + printf("%s %s %s\n", MEMFD_STR, banner, b_suffix); + fd = mfd_assert_new("kern_memfd_share_fork", - MFD_DEF_SIZE, + mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); mfd_assert_has_seals(fd, 0); @@ -870,40 +1072,40 @@ int main(int argc, char **argv) { pid_t pid; - printf("memfd: CREATE\n"); + if (argc == 2) { + if (!strcmp(argv[1], "hugetlbfs")) { + unsigned long hpage_size = default_huge_page_size(); + + if (!hpage_size) { + printf("Unable to determine huge page size\n"); + abort(); + } + + hugetlbfs_test = 1; + mfd_def_size = hpage_size * 2; + } + } + test_create(); - printf("memfd: BASIC\n"); test_basic(); - printf("memfd: SEAL-WRITE\n"); test_seal_write(); - printf("memfd: SEAL-SHRINK\n"); test_seal_shrink(); - printf("memfd: SEAL-GROW\n"); test_seal_grow(); - printf("memfd: SEAL-RESIZE\n"); test_seal_resize(); - printf("memfd: SHARE-DUP\n"); - test_share_dup(); - printf("memfd: SHARE-MMAP\n"); - test_share_mmap(); - printf("memfd: SHARE-OPEN\n"); - test_share_open(); - printf("memfd: SHARE-FORK\n"); - test_share_fork(); + test_share_dup("SHARE-DUP", ""); + test_share_mmap("SHARE-MMAP", ""); + test_share_open("SHARE-OPEN", ""); + test_share_fork("SHARE-FORK", ""); /* Run test-suite in a multi-threaded environment with a shared * file-table. */ pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM); - printf("memfd: SHARE-DUP (shared file-table)\n"); - test_share_dup(); - printf("memfd: SHARE-MMAP (shared file-table)\n"); - test_share_mmap(); - printf("memfd: SHARE-OPEN (shared file-table)\n"); - test_share_open(); - printf("memfd: SHARE-FORK (shared file-table)\n"); - test_share_fork(); + test_share_dup("SHARE-DUP", SHARED_FT_STR); + test_share_mmap("SHARE-MMAP", SHARED_FT_STR); + test_share_open("SHARE-OPEN", SHARED_FT_STR); + test_share_fork("SHARE-FORK", SHARED_FT_STR); join_idle_thread(pid); printf("memfd: DONE\n"); diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh new file mode 100755 index 000000000000..daabb350697c --- /dev/null +++ b/tools/testing/selftests/memfd/run_tests.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# please run as root + +# +# Normal tests requiring no special resources +# +./run_fuse_test.sh +./memfd_test + +# +# To test memfd_create with hugetlbfs, there needs to be hpages_test +# huge pages free. Attempt to allocate enough pages to test. +# +hpages_test=8 + +# +# Get count of free huge pages from /proc/meminfo +# +while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi +done < /proc/meminfo + +# +# If not enough free huge pages for test, attempt to increase +# +if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then + nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` + hpages_needed=`expr $hpages_test - $freepgs` + + echo 3 > /proc/sys/vm/drop_caches + echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages + if [ $? -ne 0 ]; then + echo "Please run this test as root" + exit 1 + fi + while read name size unit; do + if [ "$name" = "HugePages_Free:" ]; then + freepgs=$size + fi + done < /proc/meminfo +fi + +# +# If still not enough huge pages available, exit. But, give back any huge +# pages potentially allocated above. +# +if [ $freepgs -lt $hpages_test ]; then + # nr_hugepgs non-zero only if we attempted to increase + if [ -n "$nr_hugepgs" ]; then + echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + fi + printf "Not enough huge pages available (%d < %d)\n" \ + $freepgs $needpgs + exit 1 +fi + +# +# Run the hugetlbfs test +# +./memfd_test hugetlbfs + +# +# Give back any huge pages allocated for the test +# +if [ -n "$nr_hugepgs" ]; then + echo $nr_hugepgs > /proc/sys/vm/nr_hugepages +fi diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 79a664aeb8d7..152823b6cb21 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests include ../lib.mk override define RUN_TESTS - @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" - @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" + @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" endef override define EMIT_TESTS diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index afe109e5508a..c612d6e38c62 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,3 +1,4 @@ +msg_zerocopy socket psock_fanout psock_tpacket @@ -5,3 +6,4 @@ reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa reuseport_dualstack +reuseaddr_conflict diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f6c9dbf478f8..d86bca991f45 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -3,11 +3,11 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_GEN_FILES = socket -TEST_GEN_FILES += psock_fanout psock_tpacket -TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_FILES += reuseport_dualstack +TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy +TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict include ../lib.mk diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c new file mode 100644 index 000000000000..3ab6ec403905 --- /dev/null +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -0,0 +1,697 @@ +/* Evaluate MSG_ZEROCOPY + * + * Send traffic between two processes over one of the supported + * protocols and modes: + * + * PF_INET/PF_INET6 + * - SOCK_STREAM + * - SOCK_DGRAM + * - SOCK_DGRAM with UDP_CORK + * - SOCK_RAW + * - SOCK_RAW with IP_HDRINCL + * + * PF_PACKET + * - SOCK_DGRAM + * - SOCK_RAW + * + * Start this program on two connected hosts, one in send mode and + * the other with option '-r' to put it in receiver mode. + * + * If zerocopy mode ('-z') is enabled, the sender will verify that + * the kernel queues completions on the error queue for all zerocopy + * transfers. + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <error.h> +#include <errno.h> +#include <limits.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <sched.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#ifndef SO_EE_ORIGIN_ZEROCOPY +#define SO_EE_ORIGIN_ZEROCOPY 5 +#endif + +#ifndef SO_ZEROCOPY +#define SO_ZEROCOPY 60 +#endif + +#ifndef SO_EE_CODE_ZEROCOPY_COPIED +#define SO_EE_CODE_ZEROCOPY_COPIED 1 +#endif + +#ifndef MSG_ZEROCOPY +#define MSG_ZEROCOPY 0x4000000 +#endif + +static int cfg_cork; +static bool cfg_cork_mixed; +static int cfg_cpu = -1; /* default: pin to last cpu */ +static int cfg_family = PF_UNSPEC; +static int cfg_ifindex = 1; +static int cfg_payload_len; +static int cfg_port = 8000; +static bool cfg_rx; +static int cfg_runtime_ms = 4200; +static int cfg_verbose; +static int cfg_waittime_ms = 500; +static bool cfg_zerocopy; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; +static struct sockaddr_storage cfg_src_addr; + +static char payload[IP_MAXPACKET]; +static long packets, bytes, completions, expected_completions; +static int zerocopied = -1; +static uint32_t next_completion; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static uint16_t get_ip_csum(const uint16_t *start, int num_words) +{ + unsigned long sum = 0; + int i; + + for (i = 0; i < num_words; i++) + sum += start[i]; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static int do_setcpu(int cpu) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + error(1, 0, "setaffinity %d", cpu); + + if (cfg_verbose) + fprintf(stderr, "cpu: %u\n", cpu); + + return 0; +} + +static void do_setsockopt(int fd, int level, int optname, int val) +{ + if (setsockopt(fd, level, optname, &val, sizeof(val))) + error(1, errno, "setsockopt %d.%d: %d", level, optname, val); +} + +static int do_poll(int fd, int events) +{ + struct pollfd pfd; + int ret; + + pfd.events = events; + pfd.revents = 0; + pfd.fd = fd; + + ret = poll(&pfd, 1, cfg_waittime_ms); + if (ret == -1) + error(1, errno, "poll"); + + return ret && (pfd.revents & events); +} + +static int do_accept(int fd) +{ + int fda = fd; + + fd = accept(fda, NULL, NULL); + if (fd == -1) + error(1, errno, "accept"); + if (close(fda)) + error(1, errno, "close listen sock"); + + return fd; +} + +static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy) +{ + int ret, len, i, flags; + + len = 0; + for (i = 0; i < msg->msg_iovlen; i++) + len += msg->msg_iov[i].iov_len; + + flags = MSG_DONTWAIT; + if (do_zerocopy) + flags |= MSG_ZEROCOPY; + + ret = sendmsg(fd, msg, flags); + if (ret == -1 && errno == EAGAIN) + return false; + if (ret == -1) + error(1, errno, "send"); + if (cfg_verbose && ret != len) + fprintf(stderr, "send: ret=%u != %u\n", ret, len); + + if (len) { + packets++; + bytes += ret; + if (do_zerocopy && ret) + expected_completions++; + } + + return true; +} + +static void do_sendmsg_corked(int fd, struct msghdr *msg) +{ + bool do_zerocopy = cfg_zerocopy; + int i, payload_len, extra_len; + + /* split up the packet. for non-multiple, make first buffer longer */ + payload_len = cfg_payload_len / cfg_cork; + extra_len = cfg_payload_len - (cfg_cork * payload_len); + + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1); + + for (i = 0; i < cfg_cork; i++) { + + /* in mixed-frags mode, alternate zerocopy and copy frags + * start with non-zerocopy, to ensure attach later works + */ + if (cfg_cork_mixed) + do_zerocopy = (i & 1); + + msg->msg_iov[0].iov_len = payload_len + extra_len; + extra_len = 0; + + do_sendmsg(fd, msg, do_zerocopy); + } + + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); +} + +static int setup_iph(struct iphdr *iph, uint16_t payload_len) +{ + struct sockaddr_in *daddr = (void *) &cfg_dst_addr; + struct sockaddr_in *saddr = (void *) &cfg_src_addr; + + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->tos = 0; + iph->ihl = 5; + iph->ttl = 2; + iph->saddr = saddr->sin_addr.s_addr; + iph->daddr = daddr->sin_addr.s_addr; + iph->protocol = IPPROTO_EGP; + iph->tot_len = htons(sizeof(*iph) + payload_len); + iph->check = get_ip_csum((void *) iph, iph->ihl << 1); + + return sizeof(*iph); +} + +static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len) +{ + struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr; + struct sockaddr_in6 *saddr = (void *) &cfg_src_addr; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(payload_len); + ip6h->nexthdr = IPPROTO_EGP; + ip6h->hop_limit = 2; + ip6h->saddr = saddr->sin6_addr; + ip6h->daddr = daddr->sin6_addr; + + return sizeof(*ip6h); +} + +static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr) +{ + struct sockaddr_in6 *addr6 = (void *) sockaddr; + struct sockaddr_in *addr4 = (void *) sockaddr; + + switch (domain) { + case PF_INET: + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", str_addr); + break; + case PF_INET6: + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", str_addr); + break; + default: + error(1, 0, "illegal domain"); + } +} + +static int do_setup_tx(int domain, int type, int protocol) +{ + int fd; + + fd = socket(domain, type, protocol); + if (fd == -1) + error(1, errno, "socket t"); + + do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21); + if (cfg_zerocopy) + do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1); + + if (domain != PF_PACKET) + if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + + return fd; +} + +static bool do_recv_completion(int fd) +{ + struct sock_extended_err *serr; + struct msghdr msg = {}; + struct cmsghdr *cm; + uint32_t hi, lo, range; + int ret, zerocopy; + char control[100]; + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno == EAGAIN) + return false; + if (ret == -1) + error(1, errno, "recvmsg notification"); + if (msg.msg_flags & MSG_CTRUNC) + error(1, errno, "recvmsg notification: truncated"); + + cm = CMSG_FIRSTHDR(&msg); + if (!cm) + error(1, 0, "cmsg: no cmsg"); + if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP))) + error(1, 0, "serr: wrong type: %d.%d", + cm->cmsg_level, cm->cmsg_type); + + serr = (void *) CMSG_DATA(cm); + if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) + error(1, 0, "serr: wrong origin: %u", serr->ee_origin); + if (serr->ee_errno != 0) + error(1, 0, "serr: wrong error code: %u", serr->ee_errno); + + hi = serr->ee_data; + lo = serr->ee_info; + range = hi - lo + 1; + + /* Detect notification gaps. These should not happen often, if at all. + * Gaps can occur due to drops, reordering and retransmissions. + */ + if (lo != next_completion) + fprintf(stderr, "gap: %u..%u does not append to %u\n", + lo, hi, next_completion); + next_completion = hi + 1; + + zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); + if (zerocopied == -1) + zerocopied = zerocopy; + else if (zerocopied != zerocopy) { + fprintf(stderr, "serr: inconsistent\n"); + zerocopied = zerocopy; + } + + if (cfg_verbose >= 2) + fprintf(stderr, "completed: %u (h=%u l=%u)\n", + range, hi, lo); + + completions += range; + return true; +} + +/* Read all outstanding messages on the errqueue */ +static void do_recv_completions(int fd) +{ + while (do_recv_completion(fd)) {} +} + +/* Wait for all remaining completions on the errqueue */ +static void do_recv_remaining_completions(int fd) +{ + int64_t tstop = gettimeofday_ms() + cfg_waittime_ms; + + while (completions < expected_completions && + gettimeofday_ms() < tstop) { + if (do_poll(fd, POLLERR)) + do_recv_completions(fd); + } + + if (completions < expected_completions) + fprintf(stderr, "missing notifications: %lu < %lu\n", + completions, expected_completions); +} + +static void do_tx(int domain, int type, int protocol) +{ + struct iovec iov[3] = { {0} }; + struct sockaddr_ll laddr; + struct msghdr msg = {0}; + struct ethhdr eth; + union { + struct ipv6hdr ip6h; + struct iphdr iph; + } nh; + uint64_t tstop; + int fd; + + fd = do_setup_tx(domain, type, protocol); + + if (domain == PF_PACKET) { + uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6; + + /* sock_raw passes ll header as data */ + if (type == SOCK_RAW) { + memset(eth.h_dest, 0x06, ETH_ALEN); + memset(eth.h_source, 0x02, ETH_ALEN); + eth.h_proto = htons(proto); + iov[0].iov_base = ð + iov[0].iov_len = sizeof(eth); + msg.msg_iovlen++; + } + + /* both sock_raw and sock_dgram expect name */ + memset(&laddr, 0, sizeof(laddr)); + laddr.sll_family = AF_PACKET; + laddr.sll_ifindex = cfg_ifindex; + laddr.sll_protocol = htons(proto); + laddr.sll_halen = ETH_ALEN; + + memset(laddr.sll_addr, 0x06, ETH_ALEN); + + msg.msg_name = &laddr; + msg.msg_namelen = sizeof(laddr); + } + + /* packet and raw sockets with hdrincl must pass network header */ + if (domain == PF_PACKET || protocol == IPPROTO_RAW) { + if (cfg_family == PF_INET) + iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len); + else + iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len); + + iov[1].iov_base = (void *) &nh; + msg.msg_iovlen++; + } + + iov[2].iov_base = payload; + iov[2].iov_len = cfg_payload_len; + msg.msg_iovlen++; + msg.msg_iov = &iov[3 - msg.msg_iovlen]; + + tstop = gettimeofday_ms() + cfg_runtime_ms; + do { + if (cfg_cork) + do_sendmsg_corked(fd, &msg); + else + do_sendmsg(fd, &msg, cfg_zerocopy); + + while (!do_poll(fd, POLLOUT)) { + if (cfg_zerocopy) + do_recv_completions(fd); + } + + } while (gettimeofday_ms() < tstop); + + if (cfg_zerocopy) + do_recv_remaining_completions(fd); + + if (close(fd)) + error(1, errno, "close"); + + fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n", + packets, bytes >> 20, completions, + zerocopied == 1 ? 'y' : 'n'); +} + +static int do_setup_rx(int domain, int type, int protocol) +{ + int fd; + + /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW, + * to recv the only copy of the packet, not a clone + */ + if (domain == PF_PACKET) + error(1, 0, "Use PF_INET/SOCK_RAW to read"); + + if (type == SOCK_RAW && protocol == IPPROTO_RAW) + error(1, 0, "IPPROTO_RAW: not supported on Rx"); + + fd = socket(domain, type, protocol); + if (fd == -1) + error(1, errno, "socket r"); + + do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21); + do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16); + do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1); + + if (bind(fd, (void *) &cfg_dst_addr, cfg_alen)) + error(1, errno, "bind"); + + if (type == SOCK_STREAM) { + if (listen(fd, 1)) + error(1, errno, "listen"); + fd = do_accept(fd); + } + + return fd; +} + +/* Flush all outstanding bytes for the tcp receive queue */ +static void do_flush_tcp(int fd) +{ + int ret; + + /* MSG_TRUNC flushes up to len bytes */ + ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); + if (ret == -1 && errno == EAGAIN) + return; + if (ret == -1) + error(1, errno, "flush"); + if (!ret) + return; + + packets++; + bytes += ret; +} + +/* Flush all outstanding datagrams. Verify first few bytes of each. */ +static void do_flush_datagram(int fd, int type) +{ + int ret, off = 0; + char buf[64]; + + /* MSG_TRUNC will return full datagram length */ + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC); + if (ret == -1 && errno == EAGAIN) + return; + + /* raw ipv4 return with header, raw ipv6 without */ + if (cfg_family == PF_INET && type == SOCK_RAW) { + off += sizeof(struct iphdr); + ret -= sizeof(struct iphdr); + } + + if (ret == -1) + error(1, errno, "recv"); + if (ret != cfg_payload_len) + error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len); + if (ret > sizeof(buf) - off) + ret = sizeof(buf) - off; + if (memcmp(buf + off, payload, ret)) + error(1, 0, "recv: data mismatch"); + + packets++; + bytes += cfg_payload_len; +} + +static void do_rx(int domain, int type, int protocol) +{ + uint64_t tstop; + int fd; + + fd = do_setup_rx(domain, type, protocol); + + tstop = gettimeofday_ms() + cfg_runtime_ms; + do { + if (type == SOCK_STREAM) + do_flush_tcp(fd); + else + do_flush_datagram(fd, type); + + do_poll(fd, POLLIN); + + } while (gettimeofday_ms() < tstop); + + if (close(fd)) + error(1, errno, "close"); + + fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20); +} + +static void do_test(int domain, int type, int protocol) +{ + int i; + + if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM)) + error(1, 0, "can only cork udp sockets"); + + do_setcpu(cfg_cpu); + + for (i = 0; i < IP_MAXPACKET; i++) + payload[i] = 'a' + (i % 26); + + if (cfg_rx) + do_rx(domain, type, protocol); + else + do_tx(domain, type, protocol); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [options] <test>", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = sizeof(payload) - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + int c; + + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'c': + cfg_cork = strtol(optarg, NULL, 0); + break; + case 'C': + cfg_cpu = strtol(optarg, NULL, 0); + break; + case 'D': + setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + break; + case 'i': + cfg_ifindex = if_nametoindex(optarg); + if (cfg_ifindex == 0) + error(1, errno, "invalid iface: %s", optarg); + break; + case 'm': + cfg_cork_mixed = true; + break; + case 'p': + cfg_port = htons(strtoul(optarg, NULL, 0)); + break; + case 'r': + cfg_rx = true; + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'S': + setup_sockaddr(cfg_family, optarg, &cfg_src_addr); + break; + case 't': + cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; + break; + case 'v': + cfg_verbose++; + break; + case 'z': + cfg_zerocopy = true; + break; + } + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); + if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork)) + error(1, 0, "-m: cork_mixed requires corking and zerocopy"); + + if (optind != argc - 1) + usage(argv[0]); +} + +int main(int argc, char **argv) +{ + const char *cfg_test; + + parse_opts(argc, argv); + + cfg_test = argv[argc - 1]; + + if (!strcmp(cfg_test, "packet")) + do_test(PF_PACKET, SOCK_RAW, 0); + else if (!strcmp(cfg_test, "packet_dgram")) + do_test(PF_PACKET, SOCK_DGRAM, 0); + else if (!strcmp(cfg_test, "raw")) + do_test(cfg_family, SOCK_RAW, IPPROTO_EGP); + else if (!strcmp(cfg_test, "raw_hdrincl")) + do_test(cfg_family, SOCK_RAW, IPPROTO_RAW); + else if (!strcmp(cfg_test, "tcp")) + do_test(cfg_family, SOCK_STREAM, 0); + else if (!strcmp(cfg_test, "udp")) + do_test(cfg_family, SOCK_DGRAM, 0); + else + error(1, 0, "unknown cfg_test %s", cfg_test); + + return 0; +} diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh new file mode 100755 index 000000000000..d571d213418d --- /dev/null +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# +# Send data between two processes across namespaces +# Run twice: once without and once with zerocopy + +set -e + +readonly DEV="veth0" +readonly DEV_MTU=65535 +readonly BIN="./msg_zerocopy" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +readonly path_sysctl_mem="net.core.optmem_max" + +# Argument parsing +if [[ "$#" -lt "2" ]]; then + echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" + exit 1 +fi + +readonly IP="$1" +shift +readonly TXMODE="$1" +shift +readonly EXTRA_ARGS="$@" + +# Argument parsing: configure addresses +if [[ "${IP}" == "4" ]]; then + readonly SADDR="${SADDR4}" + readonly DADDR="${DADDR4}" +elif [[ "${IP}" == "6" ]]; then + readonly SADDR="${SADDR6}" + readonly DADDR="${DADDR6}" +else + echo "Invalid IP version ${IP}" + exit 1 +fi + +# Argument parsing: select receive mode +# +# This differs from send mode for +# - packet: use raw recv, because packet receives skb clones +# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option +case "${TXMODE}" in +'packet' | 'packet_dgram' | 'raw_hdrincl') + RXMODE='raw' + ;; +*) + RXMODE="${TXMODE}" + ;; +esac + +# Start of state changes: install cleanup handler +save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" + sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" +} + +trap cleanup EXIT + +# Configure system settings +sysctl -w -q "${path_sysctl_mem}=1000000" + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ + peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +# Optionally disable sg or csum offload to test edge cases +# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off + +do_test() { + local readonly ARGS="$1" + + echo "ipv${IP} ${TXMODE} ${ARGS}" + ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & + sleep 0.2 + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" + wait +} + +do_test "${EXTRA_ARGS}" +do_test "-z ${EXTRA_ARGS}" +echo ok diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index 4e00568d70c2..90cb903c3381 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then exit 0 fi -ip -Version 2>/dev/null >/dev/null +ip link show 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: Could not run test without the ip tool" exit 0 diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c new file mode 100644 index 000000000000..7c5b12664b03 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -0,0 +1,114 @@ +/* + * Test for the regression introduced by + * + * b9470c27607b ("inet: kill smallest_size and smallest_port") + * + * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb + * when we open the ipv6 conterpart, which is what was happening previously. + */ +#include <errno.h> +#include <error.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <stdbool.h> +#include <stdio.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#define PORT 9999 + +int open_port(int ipv6, int any) +{ + int fd = -1; + int reuseaddr = 1; + int v6only = 1; + int addrlen; + int ret = -1; + struct sockaddr *addr; + int family = ipv6 ? AF_INET6 : AF_INET; + + struct sockaddr_in6 addr6 = { + .sin6_family = AF_INET6, + .sin6_port = htons(PORT), + .sin6_addr = in6addr_any + }; + struct sockaddr_in addr4 = { + .sin_family = AF_INET, + .sin_port = htons(PORT), + .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"), + }; + + + if (ipv6) { + addr = (struct sockaddr*)&addr6; + addrlen = sizeof(addr6); + } else { + addr = (struct sockaddr*)&addr4; + addrlen = sizeof(addr4); + } + + if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) { + perror("socket"); + goto out; + } + + if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only, + sizeof(v6only)) < 0) { + perror("setsockopt IPV6_V6ONLY"); + goto out; + } + + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, + sizeof(reuseaddr)) < 0) { + perror("setsockopt SO_REUSEADDR"); + goto out; + } + + if (bind(fd, addr, addrlen) < 0) { + perror("bind"); + goto out; + } + + if (any) + return fd; + + if (listen(fd, 1) < 0) { + perror("listen"); + goto out; + } + return fd; +out: + close(fd); + return ret; +} + +int main(void) +{ + int listenfd; + int fd1, fd2; + + fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT); + listenfd = open_port(0, 0); + if (listenfd < 0) + error(1, errno, "Couldn't open listen socket"); + fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT); + fd1 = open_port(0, 1); + if (fd1 >= 0) + error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket"); + fprintf(stderr, "Opening in6addr_any:%d\n", PORT); + fd1 = open_port(1, 1); + if (fd1 < 0) + error(1, errno, "Couldn't open ipv6 reuseport"); + fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT); + fd2 = open_port(0, 1); + if (fd2 >= 0) + error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket"); + close(fd1); + fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT); + fd1 = open_port(0, 1); + if (fd1 >= 0) + error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); + fprintf(stderr, "Success"); + return 0; +} diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh new file mode 100755 index 000000000000..57b5ff576240 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -0,0 +1,272 @@ +#!/bin/sh +# +# This test is for checking rtnetlink callpaths, and get as much coverage as possible. +# +# set -e + +devdummy="test-dummy0" +ret=0 + +# set global exit status, but never reset nonzero one. +check_err() +{ + if [ $ret -eq 0 ]; then + ret=$1 + fi +} + +kci_add_dummy() +{ + ip link add name "$devdummy" type dummy + check_err $? + ip link set "$devdummy" up + check_err $? +} + +kci_del_dummy() +{ + ip link del dev "$devdummy" + check_err $? +} + +# add a bridge with vlans on top +kci_test_bridge() +{ + devbr="test-br0" + vlandev="testbr-vlan1" + + ret=0 + ip link add name "$devbr" type bridge + check_err $? + + ip link set dev "$devdummy" master "$devbr" + check_err $? + + ip link set "$devbr" up + check_err $? + + ip link add link "$devbr" name "$vlandev" type vlan id 1 + check_err $? + ip addr add dev "$vlandev" 10.200.7.23/30 + check_err $? + ip -6 addr add dev "$vlandev" dead:42::1234/64 + check_err $? + ip -d link > /dev/null + check_err $? + ip r s t all > /dev/null + check_err $? + ip -6 addr del dev "$vlandev" dead:42::1234/64 + check_err $? + + ip link del dev "$vlandev" + check_err $? + ip link del dev "$devbr" + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: bridge setup" + return 1 + fi + echo "PASS: bridge setup" + +} + +kci_test_gre() +{ + gredev=neta + rem=10.42.42.1 + loc=10.0.0.1 + + ret=0 + ip tunnel add $gredev mode gre remote $rem local $loc ttl 1 + check_err $? + ip link set $gredev up + check_err $? + ip addr add 10.23.7.10 dev $gredev + check_err $? + ip route add 10.23.8.0/30 dev $gredev + check_err $? + ip addr add dev "$devdummy" 10.23.7.11/24 + check_err $? + ip link > /dev/null + check_err $? + ip addr > /dev/null + check_err $? + ip addr del dev "$devdummy" 10.23.7.11/24 + check_err $? + + ip link del $gredev + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: gre tunnel endpoint" + return 1 + fi + echo "PASS: gre tunnel endpoint" +} + +# tc uses rtnetlink too, for full tc testing +# please see tools/testing/selftests/tc-testing. +kci_test_tc() +{ + dev=lo + ret=0 + + tc qdisc add dev "$dev" root handle 1: htb + check_err $? + tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit + check_err $? + tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256 + check_err $? + tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256 + check_err $? + tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256 + check_err $? + tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10 + check_err $? + tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10 + check_err $? + tc filter show dev "$dev" parent 1:0 > /dev/null + check_err $? + tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 + check_err $? + tc filter show dev "$dev" parent 1:0 > /dev/null + check_err $? + tc qdisc del dev "$dev" root handle 1: htb + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: tc htb hierarchy" + return 1 + fi + echo "PASS: tc htb hierarchy" + +} + +kci_test_polrouting() +{ + ret=0 + ip rule add fwmark 1 lookup 100 + check_err $? + ip route add local 0.0.0.0/0 dev lo table 100 + check_err $? + ip r s t all > /dev/null + check_err $? + ip rule del fwmark 1 lookup 100 + check_err $? + ip route del local 0.0.0.0/0 dev lo table 100 + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: policy route test" + return 1 + fi + echo "PASS: policy routing" +} + +kci_test_route_get() +{ + ret=0 + + ip route get 127.0.0.1 > /dev/null + check_err $? + ip route get 127.0.0.1 dev "$devdummy" > /dev/null + check_err $? + ip route get ::1 > /dev/null + check_err $? + ip route get fe80::1 dev "$devdummy" > /dev/null + check_err $? + ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null + check_err $? + ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null + check_err $? + ip addr add dev "$devdummy" 10.23.7.11/24 + check_err $? + ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null + check_err $? + ip addr del dev "$devdummy" 10.23.7.11/24 + check_err $? + + if [ $ret -ne 0 ];then + echo "FAIL: route get" + return 1 + fi + + echo "PASS: route get" +} + +kci_test_addrlabel() +{ + ret=0 + + ip addrlabel add prefix dead::/64 dev lo label 1 + check_err $? + + ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1" + check_err $? + + ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null + check_err $? + + ip addrlabel add prefix dead::/64 label 1 2> /dev/null + check_err $? + + ip addrlabel del prefix dead::/64 label 1 2> /dev/null + check_err $? + + # concurrent add/delete + for i in $(seq 1 1000); do + ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null + done & + + for i in $(seq 1 1000); do + ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null + done + + wait + + ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null + + if [ $ret -ne 0 ];then + echo "FAIL: ipv6 addrlabel" + return 1 + fi + + echo "PASS: ipv6 addrlabel" +} + +kci_test_rtnl() +{ + kci_add_dummy + if [ $ret -ne 0 ];then + echo "FAIL: cannot add dummy interface" + return 1 + fi + + kci_test_polrouting + kci_test_route_get + kci_test_tc + kci_test_gre + kci_test_bridge + kci_test_addrlabel + + kci_del_dummy +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +for x in ip tc;do + $x -Version 2>/dev/null >/dev/null + if [ $? -ne 0 ];then + echo "SKIP: Could not run test without the $x tool" + exit 0 + fi +done + +kci_test_rtnl + +exit $ret diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore index 9e69e982fb38..d9355035e746 100644 --- a/tools/testing/selftests/networking/timestamping/.gitignore +++ b/tools/testing/selftests/networking/timestamping/.gitignore @@ -1,3 +1,4 @@ timestamping +rxtimestamp txtimestamp hwtstamp_config diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile index ccbb9edbbbb9..92fb8ee917c5 100644 --- a/tools/testing/selftests/networking/timestamping/Makefile +++ b/tools/testing/selftests/networking/timestamping/Makefile @@ -1,4 +1,6 @@ -TEST_PROGS := hwtstamp_config timestamping txtimestamp +CFLAGS += -I../../../../../usr/include + +TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp all: $(TEST_PROGS) diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c new file mode 100644 index 000000000000..dd4162fc0419 --- /dev/null +++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c @@ -0,0 +1,389 @@ +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/ioctl.h> +#include <arpa/inet.h> +#include <net/if.h> + +#include <asm/types.h> +#include <linux/net_tstamp.h> +#include <linux/errqueue.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct options { + int so_timestamp; + int so_timestampns; + int so_timestamping; +}; + +struct tstamps { + bool tstamp; + bool tstampns; + bool swtstamp; + bool hwtstamp; +}; + +struct socket_type { + char *friendly_name; + int type; + int protocol; + bool enabled; +}; + +struct test_case { + struct options sockopt; + struct tstamps expected; + bool enabled; +}; + +struct sof_flag { + int mask; + char *name; +}; + +static struct sof_flag sof_flags[] = { +#define SOF_FLAG(f) { f, #f } + SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), + SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), + SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), +}; + +static struct socket_type socket_types[] = { + { "ip", SOCK_RAW, IPPROTO_EGP }, + { "udp", SOCK_DGRAM, IPPROTO_UDP }, + { "tcp", SOCK_STREAM, IPPROTO_TCP }, +}; + +static struct test_case test_cases[] = { + { {}, {} }, + { + { so_timestamp: 1 }, + { tstamp: true } + }, + { + { so_timestampns: 1 }, + { tstampns: true } + }, + { + { so_timestamp: 1, so_timestampns: 1 }, + { tstampns: true } + }, + { + { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE }, + {} + }, + { + /* Loopback device does not support hw timestamps. */ + { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE }, + {} + }, + { + { so_timestamping: SOF_TIMESTAMPING_SOFTWARE }, + {} + }, + { + { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_RX_HARDWARE }, + {} + }, + { + { so_timestamping: SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE }, + { swtstamp: true } + }, + { + { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE }, + { tstamp: true, swtstamp: true } + }, +}; + +static struct option long_options[] = { + { "list_tests", no_argument, 0, 'l' }, + { "test_num", required_argument, 0, 'n' }, + { "op_size", required_argument, 0, 's' }, + { "tcp", no_argument, 0, 't' }, + { "udp", no_argument, 0, 'u' }, + { "ip", no_argument, 0, 'i' }, +}; + +static int next_port = 19999; +static int op_size = 10 * 1024; + +void print_test_case(struct test_case *t) +{ + int f = 0; + + printf("sockopts {"); + if (t->sockopt.so_timestamp) + printf(" SO_TIMESTAMP "); + if (t->sockopt.so_timestampns) + printf(" SO_TIMESTAMPNS "); + if (t->sockopt.so_timestamping) { + printf(" SO_TIMESTAMPING: {"); + for (f = 0; f < ARRAY_SIZE(sof_flags); f++) + if (t->sockopt.so_timestamping & sof_flags[f].mask) + printf(" %s |", sof_flags[f].name); + printf("}"); + } + printf("} expected cmsgs: {"); + if (t->expected.tstamp) + printf(" SCM_TIMESTAMP "); + if (t->expected.tstampns) + printf(" SCM_TIMESTAMPNS "); + if (t->expected.swtstamp || t->expected.hwtstamp) { + printf(" SCM_TIMESTAMPING {"); + if (t->expected.swtstamp) + printf("0"); + if (t->expected.swtstamp && t->expected.hwtstamp) + printf(","); + if (t->expected.hwtstamp) + printf("2"); + printf("}"); + } + printf("}\n"); +} + +void do_send(int src) +{ + int r; + char *buf = malloc(op_size); + + memset(buf, 'z', op_size); + r = write(src, buf, op_size); + if (r < 0) + error(1, errno, "Failed to sendmsg"); + + free(buf); +} + +bool do_recv(int rcv, int read_size, struct tstamps expected) +{ + const int CMSG_SIZE = 1024; + + struct scm_timestamping *ts; + struct tstamps actual = {}; + char cmsg_buf[CMSG_SIZE]; + struct iovec recv_iov; + struct cmsghdr *cmsg; + bool failed = false; + struct msghdr hdr; + int flags = 0; + int r; + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_iov = &recv_iov; + hdr.msg_iovlen = 1; + recv_iov.iov_base = malloc(read_size); + recv_iov.iov_len = read_size; + + hdr.msg_control = cmsg_buf; + hdr.msg_controllen = sizeof(cmsg_buf); + + r = recvmsg(rcv, &hdr, flags); + if (r < 0) + error(1, errno, "Failed to recvmsg"); + if (r != read_size) + error(1, 0, "Only received %d bytes of payload.", r); + + if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) + error(1, 0, "Message was truncated."); + + for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; + cmsg = CMSG_NXTHDR(&hdr, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET) + error(1, 0, "Unexpected cmsg_level %d", + cmsg->cmsg_level); + switch (cmsg->cmsg_type) { + case SCM_TIMESTAMP: + actual.tstamp = true; + break; + case SCM_TIMESTAMPNS: + actual.tstampns = true; + break; + case SCM_TIMESTAMPING: + ts = (struct scm_timestamping *)CMSG_DATA(cmsg); + actual.swtstamp = !!ts->ts[0].tv_sec; + if (ts->ts[1].tv_sec != 0) + error(0, 0, "ts[1] should not be set."); + actual.hwtstamp = !!ts->ts[2].tv_sec; + break; + default: + error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type); + } + } + +#define VALIDATE(field) \ + do { \ + if (expected.field != actual.field) { \ + if (expected.field) \ + error(0, 0, "Expected " #field " to be set."); \ + else \ + error(0, 0, \ + "Expected " #field " to not be set."); \ + failed = true; \ + } \ + } while (0) + + VALIDATE(tstamp); + VALIDATE(tstampns); + VALIDATE(swtstamp); + VALIDATE(hwtstamp); +#undef VALIDATE + + free(recv_iov.iov_base); + + return failed; +} + +void config_so_flags(int rcv, struct options o) +{ + int on = 1; + + if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) + error(1, errno, "Failed to enable SO_REUSEADDR"); + + if (o.so_timestamp && + setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP, + &o.so_timestamp, sizeof(o.so_timestamp)) < 0) + error(1, errno, "Failed to enable SO_TIMESTAMP"); + + if (o.so_timestampns && + setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS, + &o.so_timestampns, sizeof(o.so_timestampns)) < 0) + error(1, errno, "Failed to enable SO_TIMESTAMPNS"); + + if (o.so_timestamping && + setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING, + &o.so_timestamping, sizeof(o.so_timestamping)) < 0) + error(1, errno, "Failed to set SO_TIMESTAMPING"); +} + +bool run_test_case(struct socket_type s, struct test_case t) +{ + int port = (s.type == SOCK_RAW) ? 0 : next_port++; + int read_size = op_size; + struct sockaddr_in addr; + bool failed = false; + int src, dst, rcv; + + src = socket(AF_INET, s.type, s.protocol); + if (src < 0) + error(1, errno, "Failed to open src socket"); + + dst = socket(AF_INET, s.type, s.protocol); + if (dst < 0) + error(1, errno, "Failed to open dst socket"); + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = htons(port); + + if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0) + error(1, errno, "Failed to bind to port %d", port); + + if (s.type == SOCK_STREAM && (listen(dst, 1) < 0)) + error(1, errno, "Failed to listen"); + + if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0) + error(1, errno, "Failed to connect"); + + if (s.type == SOCK_STREAM) { + rcv = accept(dst, NULL, NULL); + if (rcv < 0) + error(1, errno, "Failed to accept"); + close(dst); + } else { + rcv = dst; + } + + config_so_flags(rcv, t.sockopt); + usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */ + do_send(src); + + if (s.type == SOCK_RAW) + read_size += 20; /* for IP header */ + failed = do_recv(rcv, read_size, t.expected); + + close(rcv); + close(src); + + return failed; +} + +int main(int argc, char **argv) +{ + bool all_protocols = true; + bool all_tests = true; + int arg_index = 0; + int failures = 0; + int s, t; + char opt; + + while ((opt = getopt_long(argc, argv, "", long_options, + &arg_index)) != -1) { + switch (opt) { + case 'l': + for (t = 0; t < ARRAY_SIZE(test_cases); t++) { + printf("%d\t", t); + print_test_case(&test_cases[t]); + } + return 0; + case 'n': + t = atoi(optarg); + if (t >= ARRAY_SIZE(test_cases)) + error(1, 0, "Invalid test case: %d", t); + all_tests = false; + test_cases[t].enabled = true; + break; + case 's': + op_size = atoi(optarg); + break; + case 't': + all_protocols = false; + socket_types[2].enabled = true; + break; + case 'u': + all_protocols = false; + socket_types[1].enabled = true; + break; + case 'i': + all_protocols = false; + socket_types[0].enabled = true; + break; + default: + error(1, 0, "Failed to parse parameters."); + } + } + + for (s = 0; s < ARRAY_SIZE(socket_types); s++) { + if (!all_protocols && !socket_types[s].enabled) + continue; + + printf("Testing %s...\n", socket_types[s].friendly_name); + for (t = 0; t < ARRAY_SIZE(test_cases); t++) { + if (!all_tests && !test_cases[t].enabled) + continue; + + printf("Starting testcase %d...\n", t); + if (run_test_case(socket_types[s], test_cases[t])) { + failures++; + printf("FAILURE in test case "); + print_test_case(&test_cases[t]); + } + } + } + if (!failures) + printf("PASSED.\n"); + return failures; +} diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config new file mode 100644 index 000000000000..598d0a225fc9 --- /dev/null +++ b/tools/testing/selftests/nsfs/config @@ -0,0 +1,3 @@ +CONFIG_USER_NS=y +CONFIG_UTS_NS=y +CONFIG_PID_NS=y diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index fe6bc60dfc60..8932263e5a74 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -6,7 +6,7 @@ include ../../lib.mk all: $(TEST_PROGS) -CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm +CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie $(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S index ef7c971abb67..bceb53f57573 100644 --- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -8,7 +8,7 @@ message: .section ".toc" .balign 8 pattern: - .llong 0x5555AAAA5555AAAA + .8byte 0x5555AAAA5555AAAA .text FUNC_START(_start) diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 958c11c14acd..7bfcd454fb2a 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -15,6 +15,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include $(OUTPUT)/tm-tmspr: CFLAGS += -pthread $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64 +$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS)) $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c index e79ccd6aada1..a7ac2e4c60d9 100644 --- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c +++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c @@ -30,6 +30,7 @@ #include "utils.h" #include "tm.h" +#include "../pmu/lib.h" #define SPRN_DSCR 0x03 @@ -75,8 +76,6 @@ int test_body(void) ); assert(rv); /* make sure the transaction aborted */ if ((texasr >> 56) != TM_CAUSE_RESCHED) { - putchar('.'); - fflush(stdout); continue; } if (dscr2 != dscr1) { @@ -89,7 +88,12 @@ int test_body(void) } } -int main(void) +static int tm_resched_dscr(void) { - return test_harness(test_body, "tm_resched_dscr"); + return eat_cpu(test_body); +} + +int main(int argc, const char *argv[]) +{ + return test_harness(tm_resched_dscr, "tm_resched_dscr"); } diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore new file mode 100644 index 000000000000..5a4a26e5464b --- /dev/null +++ b/tools/testing/selftests/pstore/.gitignore @@ -0,0 +1,2 @@ +logs +*uuid diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile index 83dd42b2129e..d4064c742c26 100644 --- a/tools/testing/selftests/ptp/Makefile +++ b/tools/testing/selftests/ptp/Makefile @@ -1,3 +1,4 @@ +CFLAGS += -I../../../../usr/include/ TEST_PROGS := testptp LDLIBS += -lrt all: $(TEST_PROGS) diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index aeb0c805f3ca..553d870b4ca9 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,8 +1,16 @@ -TEST_GEN_PROGS := seccomp_bpf -CFLAGS += -Wl,-no-as-needed -Wall -LDFLAGS += -lpthread +all: include ../lib.mk -$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h - $(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ +.PHONY: all clean + +BINARIES := seccomp_bpf seccomp_benchmark +CFLAGS += -Wl,-no-as-needed -Wall + +seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h + $(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@ + +TEST_PROGS += $(BINARIES) +EXTRA_CLEAN := $(BINARIES) + +all: $(BINARIES) diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c new file mode 100644 index 000000000000..5838c8697ec3 --- /dev/null +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -0,0 +1,99 @@ +/* + * Strictly speaking, this is not a test. But it can report during test + * runs so relative performace can be measured. + */ +#define _GNU_SOURCE +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/seccomp.h> +#include <sys/prctl.h> +#include <sys/syscall.h> +#include <sys/types.h> + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +unsigned long long timing(clockid_t clk_id, unsigned long long samples) +{ + pid_t pid, ret; + unsigned long long i; + struct timespec start, finish; + + pid = getpid(); + assert(clock_gettime(clk_id, &start) == 0); + for (i = 0; i < samples; i++) { + ret = syscall(__NR_getpid); + assert(pid == ret); + } + assert(clock_gettime(clk_id, &finish) == 0); + + i = finish.tv_sec - start.tv_sec; + i *= 1000000000; + i += finish.tv_nsec - start.tv_nsec; + + printf("%lu.%09lu - %lu.%09lu = %llu\n", + finish.tv_sec, finish.tv_nsec, + start.tv_sec, start.tv_nsec, + i); + + return i; +} + +unsigned long long calibrate(void) +{ + unsigned long long i; + + printf("Calibrating reasonable sample size...\n"); + + for (i = 5; ; i++) { + unsigned long long samples = 1 << i; + + /* Find something that takes more than 5 seconds to run. */ + if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) + return samples; + } +} + +int main(int argc, char *argv[]) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + unsigned long long samples; + unsigned long long native, filtered; + + if (argc > 1) + samples = strtoull(argv[1], NULL, 0); + else + samples = calibrate(); + + printf("Benchmarking %llu samples...\n", samples); + + native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid native: %llu ns\n", native); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + assert(ret == 0); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + assert(ret == 0); + + filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW: %llu ns\n", filtered); + + printf("Estimated seccomp overhead per syscall: %llu ns\n", + filtered - native); + + if (filtered == native) + printf("Trying running again with more samples.\n"); + + return 0; +} diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 73f5ea6778ce..24dbf634e2dd 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -6,10 +6,18 @@ */ #include <sys/types.h> -#include <asm/siginfo.h> -#define __have_siginfo_t 1 -#define __have_sigval_t 1 -#define __have_sigevent_t 1 + +/* + * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, + * we need to use the kernel's siginfo.h file and trick glibc + * into accepting it. + */ +#if !__GLIBC_PREREQ(2, 26) +# include <asm/siginfo.h> +# define __have_siginfo_t 1 +# define __have_sigval_t 1 +# define __have_sigevent_t 1 +#endif #include <errno.h> #include <linux/filter.h> @@ -68,17 +76,7 @@ #define SECCOMP_MODE_FILTER 2 #endif -#ifndef SECCOMP_RET_KILL -#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ -#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ -#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ -#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ -#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ - -/* Masks for the return value sections. */ -#define SECCOMP_RET_ACTION 0x7fff0000U -#define SECCOMP_RET_DATA 0x0000ffffU - +#ifndef SECCOMP_RET_ALLOW struct seccomp_data { int nr; __u32 arch; @@ -87,6 +85,70 @@ struct seccomp_data { }; #endif +#ifndef SECCOMP_RET_KILL_PROCESS +#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */ +#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */ +#endif +#ifndef SECCOMP_RET_KILL +#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ +#endif +#ifndef SECCOMP_RET_LOG +#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */ +#endif + +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# elif defined(__hppa__) +# define __NR_seccomp 338 +# elif defined(__powerpc__) +# define __NR_seccomp 358 +# elif defined(__s390__) +# define __NR_seccomp 348 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_GET_ACTION_AVAIL +#define SECCOMP_GET_ACTION_AVAIL 2 +#endif + +#ifndef SECCOMP_FILTER_FLAG_TSYNC +#define SECCOMP_FILTER_FLAG_TSYNC 1 +#endif + +#ifndef SECCOMP_FILTER_FLAG_LOG +#define SECCOMP_FILTER_FLAG_LOG 2 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, void *args) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, args); +} +#endif + #if __BYTE_ORDER == __LITTLE_ENDIAN #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) #elif __BYTE_ORDER == __BIG_ENDIAN @@ -107,7 +169,7 @@ TEST(mode_strict_support) ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SECCOMP"); } - syscall(__NR_exit, 1); + syscall(__NR_exit, 0); } TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) @@ -136,7 +198,7 @@ TEST(no_new_privs_support) } } -/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +/* Tests kernel support by checking for a copy_from_user() fault on NULL. */ TEST(mode_filter_support) { long ret; @@ -342,6 +404,28 @@ TEST(empty_prog) EXPECT_EQ(EINVAL, errno); } +TEST(log_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + /* getppid() should succeed and be logged (no check for logging) */ + EXPECT_EQ(parent, syscall(__NR_getppid)); +} + TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) { struct sock_filter filter[] = { @@ -520,6 +604,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS) close(fd); } +/* This is a thread task to die via seccomp filter violation. */ +void *kill_thread(void *data) +{ + bool die = (bool)data; + + if (die) { + prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + return (void *)SIBLING_EXIT_FAILURE; + } + + return (void *)SIBLING_EXIT_UNKILLED; +} + +/* Prepare a thread that will kill itself or both of us. */ +void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +{ + pthread_t thread; + void *status; + /* Kill only when calling __NR_prctl. */ + struct sock_filter filter_thread[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog_thread = { + .len = (unsigned short)ARRAY_SIZE(filter_thread), + .filter = filter_thread, + }; + struct sock_filter filter_process[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog_process = { + .len = (unsigned short)ARRAY_SIZE(filter_process), + .filter = filter_process, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, + kill_process ? &prog_process : &prog_thread)); + + /* + * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS + * flag cannot be downgraded by a new filter. + */ + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + + /* Start a thread that will exit immediately. */ + ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); + ASSERT_EQ(0, pthread_join(thread, &status)); + ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status); + + /* Start a thread that will die immediately. */ + ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true)); + ASSERT_EQ(0, pthread_join(thread, &status)); + ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status); + + /* + * If we get here, only the spawned thread died. Let the parent know + * the whole process didn't die (i.e. this thread, the spawner, + * stayed running). + */ + exit(42); +} + +TEST(KILL_thread) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, false); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If only the thread was killed, we'll see exit 42. */ + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(42, WEXITSTATUS(status)); +} + +TEST(KILL_process) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, true); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { @@ -541,26 +736,30 @@ TEST(arg_out_of_range) EXPECT_EQ(EINVAL, errno); } +#define ERRNO_FILTER(name, errno) \ + struct sock_filter _read_filter_##name[] = { \ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \ + offsetof(struct seccomp_data, nr)), \ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \ + }; \ + struct sock_fprog prog_##name = { \ + .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \ + .filter = _read_filter_##name, \ + } + +/* Make sure basic errno values are correctly passed through a filter. */ TEST(ERRNO_valid) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(valid, E2BIG); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -568,26 +767,17 @@ TEST(ERRNO_valid) EXPECT_EQ(E2BIG, errno); } +/* Make sure an errno of zero is correctly handled by the arch code. */ TEST(ERRNO_zero) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(zero, 0); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -595,26 +785,21 @@ TEST(ERRNO_zero) EXPECT_EQ(0, read(0, NULL, 0)); } +/* + * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller. + * This tests that the errno value gets capped correctly, fixed by + * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO"). + */ TEST(ERRNO_capped) { - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; + ERRNO_FILTER(capped, 4096); long ret; pid_t parent = getppid(); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped); ASSERT_EQ(0, ret); EXPECT_EQ(parent, syscall(__NR_getppid)); @@ -622,6 +807,37 @@ TEST(ERRNO_capped) EXPECT_EQ(4095, errno); } +/* + * Filters are processed in reverse order: last applied is executed first. + * Since only the SECCOMP_RET_ACTION mask is tested for return values, the + * SECCOMP_RET_DATA mask results will follow the most recently applied + * matching filter return (and not the lowest or highest value). + */ +TEST(ERRNO_order) +{ + ERRNO_FILTER(first, 11); + ERRNO_FILTER(second, 13); + ERRNO_FILTER(third, 12); + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(12, errno); +} + FIXTURE_DATA(TRAP) { struct sock_fprog prog; }; @@ -676,7 +892,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS) syscall(__NR_getpid); } -static struct siginfo TRAP_info; +static siginfo_t TRAP_info; static volatile int TRAP_nr; static void TRAP_action(int nr, siginfo_t *info, void *void_context) { @@ -735,6 +951,7 @@ TEST_F(TRAP, handler) FIXTURE_DATA(precedence) { struct sock_fprog allow; + struct sock_fprog log; struct sock_fprog trace; struct sock_fprog error; struct sock_fprog trap; @@ -746,6 +963,13 @@ FIXTURE_SETUP(precedence) struct sock_filter allow_insns[] = { BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; + struct sock_filter log_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG), + }; struct sock_filter trace_insns[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), @@ -782,6 +1006,7 @@ FIXTURE_SETUP(precedence) memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) FILTER_ALLOC(allow); + FILTER_ALLOC(log); FILTER_ALLOC(trace); FILTER_ALLOC(error); FILTER_ALLOC(trap); @@ -792,6 +1017,7 @@ FIXTURE_TEARDOWN(precedence) { #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) FILTER_FREE(allow); + FILTER_FREE(log); FILTER_FREE(trace); FILTER_FREE(error); FILTER_FREE(trap); @@ -809,6 +1035,8 @@ TEST_F(precedence, allow_ok) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -833,6 +1061,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -864,6 +1094,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); @@ -885,6 +1117,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -910,6 +1144,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -931,6 +1167,8 @@ TEST_F(precedence, errno_is_third) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); @@ -949,6 +1187,8 @@ TEST_F(precedence, errno_is_third_in_any_order) ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); @@ -971,6 +1211,8 @@ TEST_F(precedence, trace_is_fourth) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); ASSERT_EQ(0, ret); /* Should work just fine. */ @@ -992,12 +1234,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order) ASSERT_EQ(0, ret); ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); /* Should work just fine. */ EXPECT_EQ(parent, syscall(__NR_getppid)); /* No ptracer */ EXPECT_EQ(-1, syscall(__NR_getpid)); } +TEST_F(precedence, log_is_fifth) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + +TEST_F(precedence, log_is_fifth_in_any_order) +{ + pid_t mypid, parent; + long ret; + + mypid = getpid(); + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* Should also work just fine */ + EXPECT_EQ(mypid, syscall(__NR_getpid)); +} + #ifndef PTRACE_O_TRACESECCOMP #define PTRACE_O_TRACESECCOMP 0x00000080 #endif @@ -1262,6 +1546,13 @@ TEST_F(TRACE_poke, getpid_runs_normally) # error "Do not know how to find your architecture's registers and syscalls" #endif +/* When the syscall return can't be changed, stub out the tests for it. */ +#ifdef SYSCALL_NUM_RET_SHARE_REG +# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) +#else +# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(val, action) +#endif + /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ @@ -1357,7 +1648,7 @@ void change_syscall(struct __test_metadata *_metadata, #ifdef SYSCALL_NUM_RET_SHARE_REG TH_LOG("Can't modify syscall return on this architecture"); #else - regs.SYSCALL_RET = 1; + regs.SYSCALL_RET = EPERM; #endif #ifdef HAVE_GETREGS @@ -1426,6 +1717,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, if (nr == __NR_getpid) change_syscall(_metadata, tracee, __NR_getppid); + if (nr == __NR_open) + change_syscall(_metadata, tracee, -1); } FIXTURE_DATA(TRACE_syscall) { @@ -1480,6 +1773,28 @@ FIXTURE_TEARDOWN(TRACE_syscall) free(self->prog.filter); } +TEST_F(TRACE_syscall, ptrace_syscall_redirected) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer will redirect getpid to getppid. */ + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, ptrace_syscall_dropped) +{ + /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ + teardown_trace_fixture(_metadata, self->tracer); + self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, + true); + + /* Tracer should skip the open syscall, resulting in EPERM. */ + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open)); +} + TEST_F(TRACE_syscall, syscall_allowed) { long ret; @@ -1520,13 +1835,8 @@ TEST_F(TRACE_syscall, syscall_dropped) ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); ASSERT_EQ(0, ret); -#ifdef SYSCALL_NUM_RET_SHARE_REG - /* gettid has been skipped */ - EXPECT_EQ(-1, syscall(__NR_gettid)); -#else /* gettid has been skipped and an altered return value stored. */ - EXPECT_EQ(1, syscall(__NR_gettid)); -#endif + EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid)); EXPECT_NE(self->mytid, syscall(__NR_gettid)); } @@ -1557,6 +1867,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) ASSERT_EQ(0, ret); /* Tracer will redirect getpid to getppid, and we should see EPERM. */ + errno = 0; EXPECT_EQ(-1, syscall(__NR_getpid)); EXPECT_EQ(EPERM, errno); } @@ -1654,47 +1965,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) EXPECT_NE(self->mypid, syscall(__NR_getpid)); } -#ifndef __NR_seccomp -# if defined(__i386__) -# define __NR_seccomp 354 -# elif defined(__x86_64__) -# define __NR_seccomp 317 -# elif defined(__arm__) -# define __NR_seccomp 383 -# elif defined(__aarch64__) -# define __NR_seccomp 277 -# elif defined(__hppa__) -# define __NR_seccomp 338 -# elif defined(__powerpc__) -# define __NR_seccomp 358 -# elif defined(__s390__) -# define __NR_seccomp 348 -# else -# warning "seccomp syscall number unknown for this architecture" -# define __NR_seccomp 0xffff -# endif -#endif - -#ifndef SECCOMP_SET_MODE_STRICT -#define SECCOMP_SET_MODE_STRICT 0 -#endif - -#ifndef SECCOMP_SET_MODE_FILTER -#define SECCOMP_SET_MODE_FILTER 1 -#endif - -#ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 -#endif - -#ifndef seccomp -int seccomp(unsigned int op, unsigned int flags, void *args) -{ - errno = 0; - return syscall(__NR_seccomp, op, flags, args); -} -#endif - TEST(seccomp_syscall) { struct sock_filter filter[] = { @@ -1783,6 +2053,67 @@ TEST(seccomp_syscall_mode_lock) } } +/* + * Test detection of known and unknown filter flags. Userspace needs to be able + * to check if a filter flag is supported by the current kernel and a good way + * of doing that is by attempting to enter filter mode, with the flag bit in + * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates + * that the flag is valid and EINVAL indicates that the flag is invalid. + */ +TEST(detect_seccomp_filter_flags) +{ + unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, + SECCOMP_FILTER_FLAG_LOG }; + unsigned int flag, all_flags; + int i; + long ret; + + /* Test detection of known-good filter flags */ + for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!", + flag); + } + + all_flags |= flag; + } + + /* Test detection of all known-good filter flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!", + all_flags); + } + + /* Test detection of an unknown filter flag */ + flag = -1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!", + flag); + } + + /* + * Test detection of an unknown filter flag that may simply need to be + * added to this test + */ + flag = flags[ARRAY_SIZE(flags) - 1] << 1; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?", + flag); + } +} + TEST(TSYNC_first) { struct sock_filter filter[] = { @@ -2421,6 +2752,99 @@ TEST(syscall_restart) _metadata->passed = 0; } +TEST_SIGNAL(filter_flag_log, SIGSYS) +{ + struct sock_filter allow_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter kill_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog allow_prog = { + .len = (unsigned short)ARRAY_SIZE(allow_filter), + .filter = allow_filter, + }; + struct sock_fprog kill_prog = { + .len = (unsigned short)ARRAY_SIZE(kill_filter), + .filter = kill_filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + EXPECT_NE(0, ret) { + TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!"); + } + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!"); + } + + /* Verify that a simple, permissive filter can be added with no flags */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); + EXPECT_EQ(0, ret); + + /* See if the same filter can be added with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &allow_prog); + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!"); + } + EXPECT_EQ(0, ret); + + /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG, + &kill_prog); + EXPECT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST(get_action_avail) +{ + __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP, + SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE, + SECCOMP_RET_LOG, SECCOMP_RET_ALLOW }; + __u32 unknown_action = 0x10000000U; + int i; + long ret; + + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]); + ASSERT_NE(ENOSYS, errno) { + TH_LOG("Kernel does not support seccomp syscall!"); + } + ASSERT_NE(EINVAL, errno) { + TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!"); + } + EXPECT_EQ(ret, 0); + + for (i = 0; i < ARRAY_SIZE(actions); i++) { + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]); + EXPECT_EQ(ret, 0) { + TH_LOG("Expected action (0x%X) not available!", + actions[i]); + } + } + + /* Check that an unknown action is handled properly (EOPNOTSUPP) */ + ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); +} + /* * TODO: * - add microbenchmarks @@ -2429,6 +2853,8 @@ TEST(syscall_restart) * - endianness checking when appropriate * - 64-bit arg prodding * - arch value testing (x86 modes especially) + * - verify that FILTER_FLAG_LOG filters generate log messages + * - verify that RET_LOG generates log messages * - ... */ diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c index ccd07343d418..97bb150837df 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/sigaltstack/sas.c @@ -17,6 +17,8 @@ #include <assert.h> #include <errno.h> +#include "../kselftest.h" + #ifndef SS_AUTODISARM #define SS_AUTODISARM (1U << 31) #endif @@ -37,12 +39,15 @@ void my_usr1(int sig, siginfo_t *si, void *u) stack_t stk; struct stk_data *p; +#if __s390x__ + register unsigned long sp asm("%15"); +#else register unsigned long sp asm("sp"); +#endif if (sp < (unsigned long)sstack || sp >= (unsigned long)sstack + SIGSTKSZ) { - printf("[FAIL]\tSP is not on sigaltstack\n"); - exit(EXIT_FAILURE); + ksft_exit_fail_msg("SP is not on sigaltstack\n"); } /* put some data on stack. other sighandler will try to overwrite it */ aa = alloca(1024); @@ -50,21 +55,22 @@ void my_usr1(int sig, siginfo_t *si, void *u) p = (struct stk_data *)(aa + 512); strcpy(p->msg, msg); p->flag = 1; - printf("[RUN]\tsignal USR1\n"); + ksft_print_msg("[RUN]\tsignal USR1\n"); err = sigaltstack(NULL, &stk); if (err) { - perror("[FAIL]\tsigaltstack()"); + ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (stk.ss_flags != SS_DISABLE) - printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n", + ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n", stk.ss_flags); else - printf("[OK]\tsigaltstack is disabled in sighandler\n"); + ksft_test_result_pass( + "sigaltstack is disabled in sighandler\n"); swapcontext(&sc, &uc); - printf("%s\n", p->msg); + ksft_print_msg("%s\n", p->msg); if (!p->flag) { - printf("[RUN]\tAborting\n"); + ksft_exit_skip("[RUN]\tAborting\n"); exit(EXIT_FAILURE); } } @@ -74,13 +80,13 @@ void my_usr2(int sig, siginfo_t *si, void *u) char *aa; struct stk_data *p; - printf("[RUN]\tsignal USR2\n"); + ksft_print_msg("[RUN]\tsignal USR2\n"); aa = alloca(1024); /* dont run valgrind on this */ /* try to find the data stored by previous sighandler */ p = memmem(aa, 1024, msg, strlen(msg)); if (p) { - printf("[FAIL]\tsigaltstack re-used\n"); + ksft_test_result_fail("sigaltstack re-used\n"); /* corrupt the data */ strcpy(p->msg, msg2); /* tell other sighandler that his data is corrupted */ @@ -90,7 +96,7 @@ void my_usr2(int sig, siginfo_t *si, void *u) static void switch_fn(void) { - printf("[RUN]\tswitched to user ctx\n"); + ksft_print_msg("[RUN]\tswitched to user ctx\n"); raise(SIGUSR2); setcontext(&sc); } @@ -101,6 +107,8 @@ int main(void) stack_t stk; int err; + ksft_print_header(); + sigemptyset(&act.sa_mask); act.sa_flags = SA_ONSTACK | SA_SIGINFO; act.sa_sigaction = my_usr1; @@ -110,19 +118,20 @@ int main(void) sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (sstack == MAP_FAILED) { - perror("mmap()"); + ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); return EXIT_FAILURE; } err = sigaltstack(NULL, &stk); if (err) { - perror("[FAIL]\tsigaltstack()"); + ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (stk.ss_flags == SS_DISABLE) { - printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n"); + ksft_test_result_pass( + "Initial sigaltstack state was SS_DISABLE\n"); } else { - printf("[FAIL]\tInitial sigaltstack state was %x; " + ksft_exit_fail_msg("Initial sigaltstack state was %x; " "should have been SS_DISABLE\n", stk.ss_flags); return EXIT_FAILURE; } @@ -133,7 +142,8 @@ int main(void) err = sigaltstack(&stk, NULL); if (err) { if (errno == EINVAL) { - printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n"); + ksft_exit_skip( + "[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n"); /* * If test cases for the !SS_AUTODISARM variant were * added, we could still run them. We don't have any @@ -142,7 +152,9 @@ int main(void) */ return 0; } else { - perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)"); + ksft_exit_fail_msg( + "sigaltstack(SS_ONSTACK | SS_AUTODISARM) %s\n", + strerror(errno)); return EXIT_FAILURE; } } @@ -150,7 +162,7 @@ int main(void) ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (ustack == MAP_FAILED) { - perror("mmap()"); + ksft_exit_fail_msg("mmap() - %s\n", strerror(errno)); return EXIT_FAILURE; } getcontext(&uc); @@ -162,16 +174,17 @@ int main(void) err = sigaltstack(NULL, &stk); if (err) { - perror("[FAIL]\tsigaltstack()"); + ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno)); exit(EXIT_FAILURE); } if (stk.ss_flags != SS_AUTODISARM) { - printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n", + ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n", stk.ss_flags); exit(EXIT_FAILURE); } - printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n"); + ksft_test_result_pass( + "sigaltstack is still SS_AUTODISARM after signal\n"); - printf("[OK]\tTest passed\n"); + ksft_exit_pass(); return 0; } diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore new file mode 100644 index 000000000000..1e23fefd68e8 --- /dev/null +++ b/tools/testing/selftests/splice/.gitignore @@ -0,0 +1 @@ +default_file_splice_read diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile index 9fc78e5e5451..7e1187e007fa 100644 --- a/tools/testing/selftests/splice/Makefile +++ b/tools/testing/selftests/splice/Makefile @@ -1,7 +1,4 @@ TEST_PROGS := default_file_splice_read.sh -EXTRA := default_file_splice_read -all: $(TEST_PROGS) $(EXTRA) +TEST_GEN_PROGS_EXTENDED := default_file_splice_read include ../lib.mk - -EXTRA_CLEAN := $(EXTRA) diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile index 4981c6b6d050..8e04d0afcbd7 100644 --- a/tools/testing/selftests/sync/Makefile +++ b/tools/testing/selftests/sync/Makefile @@ -2,12 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra CFLAGS += -I../../../../usr/include/ LDFLAGS += -pthread -TEST_PROGS = sync_test - -all: $(TEST_PROGS) +.PHONY: all clean include ../lib.mk +# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special +# build rules. lib.mk will run and install them. + +TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test +all: $(TEST_CUSTOM_PROGS) + OBJS = sync_test.o sync.o TESTS += sync_alloc.o @@ -18,6 +22,16 @@ TESTS += sync_stress_parallelism.o TESTS += sync_stress_consumer.o TESTS += sync_stress_merge.o -sync_test: $(OBJS) $(TESTS) +OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS)) +TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS)) + +$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS) + $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS) + +$(OBJS): $(OUTPUT)/%.o: %.c + $(CC) -c $^ -o $@ + +$(TESTS): $(OUTPUT)/%.o: %.c + $(CC) -c $^ -o $@ -EXTRA_CLEAN := sync_test $(OBJS) $(TESTS) +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c index 62fa666e501a..7f7938263c5c 100644 --- a/tools/testing/selftests/sync/sync_test.c +++ b/tools/testing/selftests/sync/sync_test.c @@ -31,62 +31,83 @@ #include <sys/types.h> #include <sys/stat.h> #include <sys/wait.h> +#include <errno.h> +#include <string.h> +#include "../kselftest.h" #include "synctest.h" static int run_test(int (*test)(void), char *name) { int result; pid_t childpid; + int ret; fflush(stdout); childpid = fork(); if (childpid) { waitpid(childpid, &result, 0); - if (WIFEXITED(result)) - return WEXITSTATUS(result); + if (WIFEXITED(result)) { + ret = WEXITSTATUS(result); + if (!ret) + ksft_test_result_pass("[RUN]\t%s\n", name); + else + ksft_test_result_fail("[RUN]\t%s\n", name); + return ret; + } return 1; } - printf("[RUN]\tExecuting %s\n", name); exit(test()); } -static int sync_api_supported(void) +static void sync_api_supported(void) { struct stat sbuf; + int ret; - return 0 == stat("/sys/kernel/debug/sync/sw_sync", &sbuf); + ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf); + if (!ret) + return; + + if (errno == ENOENT) + ksft_exit_skip("Sync framework not supported by kernel\n"); + + if (errno == EACCES) + ksft_exit_skip("Run Sync test as root.\n"); + + ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s", + strerror(errno)); } int main(void) { - int err = 0; + int err; - if (!sync_api_supported()) { - printf("SKIP: Sync framework not supported by kernel\n"); - return 0; - } + ksft_print_header(); + + sync_api_supported(); - printf("[RUN]\tTesting sync framework\n"); + ksft_print_msg("[RUN]\tTesting sync framework\n"); - err += RUN_TEST(test_alloc_timeline); - err += RUN_TEST(test_alloc_fence); - err += RUN_TEST(test_alloc_fence_negative); + RUN_TEST(test_alloc_timeline); + RUN_TEST(test_alloc_fence); + RUN_TEST(test_alloc_fence_negative); - err += RUN_TEST(test_fence_one_timeline_wait); - err += RUN_TEST(test_fence_one_timeline_merge); - err += RUN_TEST(test_fence_merge_same_fence); - err += RUN_TEST(test_fence_multi_timeline_wait); - err += RUN_TEST(test_stress_two_threads_shared_timeline); - err += RUN_TEST(test_consumer_stress_multi_producer_single_consumer); - err += RUN_TEST(test_merge_stress_random_merge); + RUN_TEST(test_fence_one_timeline_wait); + RUN_TEST(test_fence_one_timeline_merge); + RUN_TEST(test_fence_merge_same_fence); + RUN_TEST(test_fence_multi_timeline_wait); + RUN_TEST(test_stress_two_threads_shared_timeline); + RUN_TEST(test_consumer_stress_multi_producer_single_consumer); + RUN_TEST(test_merge_stress_random_merge); + err = ksft_get_fail_cnt(); if (err) - printf("[FAIL]\tsync errors: %d\n", err); - else - printf("[OK]\tsync\n"); + ksft_exit_fail_msg("%d out of %d sync tests failed\n", + err, ksft_test_num()); - return !!err; + /* need this return to keep gcc happy */ + return ksft_exit_pass(); } diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h index e7d1d57dba7a..90a8e5369914 100644 --- a/tools/testing/selftests/sync/synctest.h +++ b/tools/testing/selftests/sync/synctest.h @@ -29,10 +29,11 @@ #define SELFTESTS_SYNCTEST_H #include <stdio.h> +#include "../kselftest.h" #define ASSERT(cond, msg) do { \ if (!(cond)) { \ - printf("[ERROR]\t%s", (msg)); \ + ksft_print_msg("[ERROR]\t%s", (msg)); \ return 1; \ } \ } while (0) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json index af519bc97a8e..6973bdc5b5bf 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json @@ -1111,5 +1111,55 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "a568", + "name": "Add action with ife type", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode type 0xDEAD index 1" + ], + "cmdUnderTest": "$TC actions get action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "type 0xDEAD", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] + }, + { + "id": "b983", + "name": "Add action without ife type", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode index 1" + ], + "cmdUnderTest": "$TC actions get action ife index 1", + "expExitCode": "0", + "verifyCmd": "$TC actions get action ife index 1", + "matchPattern": "type 0xED3E", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] } ]
\ No newline at end of file diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index a9b86133b9b3..ae4593115408 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,5 +1,4 @@ -BUILD_FLAGS = -DKTEST -CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) +CFLAGS += -O3 -Wl,-no-as-needed -Wall LDFLAGS += -lrt -lpthread -lm # these are all "safe" tests that don't modify @@ -7,9 +6,11 @@ LDFLAGS += -lrt -lpthread -lm TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ inconsistency-check raw_skew threadtest rtctest -TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \ +DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ skew_consistency clocksource-switch freq-step leap-a-day \ - leapcrash set-tai set-2038 set-tz rtctest_setdate + leapcrash set-tai set-2038 set-tz + +TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate include ../lib.mk @@ -18,16 +19,4 @@ include ../lib.mk # and may modify the system time or trigger # other behavior like suspend run_destructive_tests: run_tests - ./alarmtimer-suspend - ./valid-adjtimex - ./adjtick - ./change_skew - ./skew_consistency - ./clocksource-switch - ./freq-step - ./leap-a-day -s -i 10 - ./leapcrash - ./set-tz - ./set-tai - ./set-2038 - + $(call RUN_TESTS, $(DESTRUCTIVE_TESTS)) diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 9887fd538fec..0caca3a06bd2 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -23,18 +23,7 @@ #include <sys/timex.h> #include <time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CLOCK_MONOTONIC_RAW 4 diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 2b361b830395..4da09dbf83ba 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -28,18 +28,7 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c index cb1968977c04..c4eab7124990 100644 --- a/tools/testing/selftests/timers/change_skew.c +++ b/tools/testing/selftests/timers/change_skew.c @@ -28,18 +28,7 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index 5ff165373f8b..bfc974b4572d 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -34,18 +34,7 @@ #include <fcntl.h> #include <string.h> #include <sys/wait.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif int get_clocksources(char list[][30]) @@ -61,7 +50,7 @@ int get_clocksources(char list[][30]) close(fd); - for (i = 0; i < 30; i++) + for (i = 0; i < 10; i++) list[i][0] = '\0'; head = buf; diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index 74c60e8759a0..022d3ffe3fbf 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -28,18 +28,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CALLS_PER_LOOP 64 #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index fb46ad6ac92c..19e46ed5dfb5 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -48,18 +48,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL #define CLOCK_TAI 11 @@ -190,18 +179,18 @@ int main(int argc, char **argv) struct sigevent se; struct sigaction act; int signum = SIGRTMAX; - int settime = 0; + int settime = 1; int tai_time = 0; int insert = 1; - int iterations = -1; + int iterations = 10; int opt; /* Process arguments */ while ((opt = getopt(argc, argv, "sti:")) != -1) { switch (opt) { - case 's': - printf("Setting time to speed up testing\n"); - settime = 1; + case 'w': + printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n"); + settime = 0; break; case 'i': iterations = atoi(optarg); @@ -210,9 +199,10 @@ int main(int argc, char **argv) tai_time = 1; break; default: - printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]); - printf(" -s: Set time to right before leap second each iteration\n"); - printf(" -i: Number of iterations\n"); + printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]); + printf(" -w: Set flag and wait for leap second each iteration"); + printf(" (default sets time to right before leapsecond)\n"); + printf(" -i: Number of iterations (-1 = infinite, default is 10)\n"); printf(" -t: Print TAI time\n"); exit(-1); } diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c index a1071bdbdeb7..830c462f605d 100644 --- a/tools/testing/selftests/timers/leapcrash.c +++ b/tools/testing/selftests/timers/leapcrash.c @@ -22,20 +22,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif - - /* clear NTP time_status & time_state */ int clear_time_state(void) diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c index a2a3924d0b41..1867db5d6f5e 100644 --- a/tools/testing/selftests/timers/mqueue-lat.c +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -29,18 +29,7 @@ #include <signal.h> #include <errno.h> #include <mqueue.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index ff942ff7c9b3..8adb0bb51d4d 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -27,18 +27,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index 2d7898fda0f1..c3c3dc10db17 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -24,18 +24,7 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000ULL diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index 30906bfd9c1b..ca6cd146aafe 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -25,19 +25,7 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif - #define CLOCK_MONOTONIC_RAW 4 #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c index f61170f7b024..411eff625e66 100644 --- a/tools/testing/selftests/timers/rtctest.c +++ b/tools/testing/selftests/timers/rtctest.c @@ -221,6 +221,11 @@ test_READ: /* Read the current alarm settings */ retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); if (retval == -1) { + if (errno == EINVAL) { + fprintf(stderr, + "\n...EINVAL reading current alarm setting.\n"); + goto test_PIE; + } perror("RTC_ALM_READ ioctl"); exit(errno); } @@ -231,7 +236,7 @@ test_READ: /* Enable alarm interrupts */ retval = ioctl(fd, RTC_AIE_ON, 0); if (retval == -1) { - if (errno == EINVAL) { + if (errno == EINVAL || errno == EIO) { fprintf(stderr, "\n...Alarm IRQs not supported.\n"); goto test_PIE; diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c index c8a7e14446b1..688cfd81b531 100644 --- a/tools/testing/selftests/timers/set-2038.c +++ b/tools/testing/selftests/timers/set-2038.c @@ -27,18 +27,7 @@ #include <unistd.h> #include <time.h> #include <sys/time.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c index dc88dbc8831f..70fed27d8fd3 100644 --- a/tools/testing/selftests/timers/set-tai.c +++ b/tools/testing/selftests/timers/set-tai.c @@ -23,18 +23,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif int set_tai(int offset) { diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c index 15434da23b04..50da45437daa 100644 --- a/tools/testing/selftests/timers/set-timer-lat.c +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -28,18 +28,7 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define CLOCK_REALTIME 0 #define CLOCK_MONOTONIC 1 @@ -154,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1) printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", clockstring(clock_id), flags ? "ABSTIME":"RELTIME"); - return 0; + /* Indicate timer isn't set, so caller doesn't wait */ + return 1; } printf("%s - timer_create() failed\n", clockstring(clock_id)); return -1; @@ -224,8 +214,9 @@ int do_timer(int clock_id, int flags) int err; err = setup_timer(clock_id, flags, interval, &tm1); + /* Unsupported case - return 0 to not fail the test */ if (err) - return err; + return err == 1 ? 0 : err; while (alarmcount < 5) sleep(1); @@ -239,18 +230,17 @@ int do_timer_oneshot(int clock_id, int flags) timer_t tm1; const int interval = 0; struct timeval timeout; - fd_set fds; int err; err = setup_timer(clock_id, flags, interval, &tm1); + /* Unsupported case - return 0 to not fail the test */ if (err) - return err; + return err == 1 ? 0 : err; memset(&timeout, 0, sizeof(timeout)); timeout.tv_sec = 5; - FD_ZERO(&fds); do { - err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout); + err = select(0, NULL, NULL, NULL, &timeout); } while (err == -1 && errno == EINTR); timer_delete(tm1); diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c index f4184928b16b..877fd5532fee 100644 --- a/tools/testing/selftests/timers/set-tz.c +++ b/tools/testing/selftests/timers/set-tz.c @@ -23,18 +23,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif int set_tz(int min, int dst) { diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c index 2a996e072259..022b711c78ee 100644 --- a/tools/testing/selftests/timers/skew_consistency.c +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -35,18 +35,7 @@ #include <stdlib.h> #include <string.h> #include <sys/wait.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index e632e116f05e..759c9c06f1a0 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -21,19 +21,7 @@ #include <stdlib.h> #include <sys/time.h> #include <pthread.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif - /* serializes shared list access */ pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 60fe3c569bd9..d9d3ab93b31a 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -32,18 +32,7 @@ #include <string.h> #include <signal.h> #include <unistd.h> -#ifdef KTEST #include "../kselftest.h" -#else -static inline int ksft_exit_pass(void) -{ - exit(0); -} -static inline int ksft_exit_fail(void) -{ - exit(1); -} -#endif #define NSEC_PER_SEC 1000000000LL #define USEC_PER_SEC 1000000LL diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 1eae79ae5b4e..a2c53a3d223d 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -66,6 +66,8 @@ #include <sys/wait.h> #include <pthread.h> #include <linux/userfaultfd.h> +#include <setjmp.h> +#include <stdbool.h> #ifdef __NR_userfaultfd @@ -82,11 +84,17 @@ static int bounces; #define TEST_SHMEM 3 static int test_type; +/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */ +#define ALARM_INTERVAL_SECS 10 +static volatile bool test_uffdio_copy_eexist = true; +static volatile bool test_uffdio_zeropage_eexist = true; + +static bool map_shared; static int huge_fd; static char *huge_fd_off0; static unsigned long long *count_verify; static int uffd, uffd_flags, finished, *pipefd; -static char *area_src, *area_dst; +static char *area_src, *area_src_alias, *area_dst, *area_dst_alias; static char *zeropage; pthread_attr_t attr; @@ -125,6 +133,9 @@ static void anon_allocate_area(void **alloc_area) } } +static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) +{ +} /* HugeTLB memory */ static int hugetlb_release_pages(char *rel_area) @@ -145,17 +156,51 @@ static int hugetlb_release_pages(char *rel_area) static void hugetlb_allocate_area(void **alloc_area) { + void *area_alias = NULL; + char **alloc_area_alias; *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_HUGETLB, huge_fd, - *alloc_area == area_src ? 0 : - nr_pages * page_size); + (map_shared ? MAP_SHARED : MAP_PRIVATE) | + MAP_HUGETLB, + huge_fd, *alloc_area == area_src ? 0 : + nr_pages * page_size); if (*alloc_area == MAP_FAILED) { fprintf(stderr, "mmap of hugetlbfs file failed\n"); *alloc_area = NULL; } - if (*alloc_area == area_src) + if (map_shared) { + area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_HUGETLB, + huge_fd, *alloc_area == area_src ? 0 : + nr_pages * page_size); + if (area_alias == MAP_FAILED) { + if (munmap(*alloc_area, nr_pages * page_size) < 0) + perror("hugetlb munmap"), exit(1); + *alloc_area = NULL; + return; + } + } + if (*alloc_area == area_src) { huge_fd_off0 = *alloc_area; + alloc_area_alias = &area_src_alias; + } else { + alloc_area_alias = &area_dst_alias; + } + if (area_alias) + *alloc_area_alias = area_alias; +} + +static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset) +{ + if (!map_shared) + return; + /* + * We can't zap just the pagetable with hugetlbfs because + * MADV_DONTEED won't work. So exercise -EEXIST on a alias + * mapping where the pagetables are not established initially, + * this way we'll exercise the -EEXEC at the fs level. + */ + *start = (unsigned long) area_dst_alias + offset; } /* Shared memory */ @@ -185,6 +230,7 @@ struct uffd_test_ops { unsigned long expected_ioctls; void (*allocate_area)(void **alloc_area); int (*release_pages)(char *rel_area); + void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset); }; #define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \ @@ -195,18 +241,21 @@ static struct uffd_test_ops anon_uffd_test_ops = { .expected_ioctls = ANON_EXPECTED_IOCTLS, .allocate_area = anon_allocate_area, .release_pages = anon_release_pages, + .alias_mapping = noop_alias_mapping, }; static struct uffd_test_ops shmem_uffd_test_ops = { - .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .expected_ioctls = ANON_EXPECTED_IOCTLS, .allocate_area = shmem_allocate_area, .release_pages = shmem_release_pages, + .alias_mapping = noop_alias_mapping, }; static struct uffd_test_ops hugetlb_uffd_test_ops = { .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, + .alias_mapping = hugetlb_alias_mapping, }; static struct uffd_test_ops *uffd_test_ops; @@ -331,6 +380,23 @@ static void *locking_thread(void *arg) return NULL; } +static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, + unsigned long offset) +{ + uffd_test_ops->alias_mapping(&uffdio_copy->dst, + uffdio_copy->len, + offset); + if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { + /* real retval in ufdio_copy.copy */ + if (uffdio_copy->copy != -EEXIST) + fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", + uffdio_copy->copy), exit(1); + } else { + fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", + uffdio_copy->copy), exit(1); + } +} + static int copy_page(int ufd, unsigned long offset) { struct uffdio_copy uffdio_copy; @@ -351,8 +417,13 @@ static int copy_page(int ufd, unsigned long offset) } else if (uffdio_copy.copy != page_size) { fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", uffdio_copy.copy), exit(1); - } else + } else { + if (test_uffdio_copy_eexist) { + test_uffdio_copy_eexist = false; + retry_copy_page(ufd, &uffdio_copy, offset); + } return 1; + } return 0; } @@ -408,6 +479,7 @@ static void *uffd_poll_thread(void *arg) userfaults++; break; case UFFD_EVENT_FORK: + close(uffd); uffd = msg.arg.fork.ufd; pollfd[0].fd = uffd; break; @@ -572,6 +644,17 @@ static int userfaultfd_open(int features) return 0; } +sigjmp_buf jbuf, *sigbuf; + +static void sighndl(int sig, siginfo_t *siginfo, void *ptr) +{ + if (sig == SIGBUS) { + if (sigbuf) + siglongjmp(*sigbuf, 1); + abort(); + } +} + /* * For non-cooperative userfaultfd test we fork() a process that will * generate pagefaults, will mremap the area monitored by the @@ -585,19 +668,59 @@ static int userfaultfd_open(int features) * The release of the pages currently generates event for shmem and * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked * for hugetlb. + * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register + * monitored area, generate pagefaults and test that signal is delivered. + * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2 + * test robustness use case - we release monitored area, fork a process + * that will generate pagefaults and verify signal is generated. + * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal + * feature. Using monitor thread, verify no userfault events are generated. */ -static int faulting_process(void) +static int faulting_process(int signal_test) { unsigned long nr; unsigned long long count; unsigned long split_nr_pages; + unsigned long lastnr; + struct sigaction act; + unsigned long signalled = 0; if (test_type != TEST_HUGETLB) split_nr_pages = (nr_pages + 1) / 2; else split_nr_pages = nr_pages; + if (signal_test) { + sigbuf = &jbuf; + memset(&act, 0, sizeof(act)); + act.sa_sigaction = sighndl; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGBUS, &act, 0)) { + perror("sigaction"); + return 1; + } + lastnr = (unsigned long)-1; + } + for (nr = 0; nr < split_nr_pages; nr++) { + if (signal_test) { + if (sigsetjmp(*sigbuf, 1) != 0) { + if (nr == lastnr) { + fprintf(stderr, "Signal repeated\n"); + return 1; + } + + lastnr = nr; + if (signal_test == 1) { + if (copy_page(uffd, nr * page_size)) + signalled++; + } else { + signalled++; + continue; + } + } + } + count = *area_count(area_dst, nr); if (count != count_verify[nr]) { fprintf(stderr, @@ -607,6 +730,9 @@ static int faulting_process(void) } } + if (signal_test) + return signalled != split_nr_pages; + if (test_type == TEST_HUGETLB) return 0; @@ -636,6 +762,23 @@ static int faulting_process(void) return 0; } +static void retry_uffdio_zeropage(int ufd, + struct uffdio_zeropage *uffdio_zeropage, + unsigned long offset) +{ + uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start, + uffdio_zeropage->range.len, + offset); + if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { + if (uffdio_zeropage->zeropage != -EEXIST) + fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", + uffdio_zeropage->zeropage), exit(1); + } else { + fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", + uffdio_zeropage->zeropage), exit(1); + } +} + static int uffdio_zeropage(int ufd, unsigned long offset) { struct uffdio_zeropage uffdio_zeropage; @@ -670,8 +813,14 @@ static int uffdio_zeropage(int ufd, unsigned long offset) if (uffdio_zeropage.zeropage != page_size) { fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", uffdio_zeropage.zeropage), exit(1); - } else + } else { + if (test_uffdio_zeropage_eexist) { + test_uffdio_zeropage_eexist = false; + retry_uffdio_zeropage(ufd, &uffdio_zeropage, + offset); + } return 1; + } } else { fprintf(stderr, "UFFDIO_ZEROPAGE succeeded %Ld\n", @@ -761,7 +910,7 @@ static int userfaultfd_events_test(void) perror("fork"), exit(1); if (!pid) - return faulting_process(); + return faulting_process(0); waitpid(pid, &err, 0); if (err) @@ -778,6 +927,72 @@ static int userfaultfd_events_test(void) return userfaults != nr_pages; } +static int userfaultfd_sig_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + unsigned long userfaults; + pthread_t uffd_mon; + int err, features; + pid_t pid; + char c; + + printf("testing signal delivery: "); + fflush(stdout); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS; + if (userfaultfd_open(features) < 0) + return 1; + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffdio_register.range.start = (unsigned long) area_dst; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + fprintf(stderr, "register failure\n"), exit(1); + + expected_ioctls = uffd_test_ops->expected_ioctls; + if ((uffdio_register.ioctls & expected_ioctls) != + expected_ioctls) + fprintf(stderr, + "unexpected missing ioctl for anon memory\n"), + exit(1); + + if (faulting_process(1)) + fprintf(stderr, "faulting process failed\n"), exit(1); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL)) + perror("uffd_poll_thread create"), exit(1); + + pid = fork(); + if (pid < 0) + perror("fork"), exit(1); + + if (!pid) + exit(faulting_process(2)); + + waitpid(pid, &err, 0); + if (err) + fprintf(stderr, "faulting process failed\n"), exit(1); + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + perror("pipe write"), exit(1); + if (pthread_join(uffd_mon, (void **)&userfaults)) + return 1; + + printf("done.\n"); + if (userfaults) + fprintf(stderr, "Signal test failed, userfaults: %ld\n", + userfaults); + close(uffd); + return userfaults != 0; +} static int userfaultfd_stress(void) { void *area; @@ -879,6 +1094,15 @@ static int userfaultfd_stress(void) return 1; } + if (area_dst_alias) { + uffdio_register.range.start = (unsigned long) + area_dst_alias; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure alias\n"); + return 1; + } + } + /* * The madvise done previously isn't enough: some * uffd_thread could have read userfaults (one of @@ -912,9 +1136,17 @@ static int userfaultfd_stress(void) /* unregister */ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) { - fprintf(stderr, "register failure\n"); + fprintf(stderr, "unregister failure\n"); return 1; } + if (area_dst_alias) { + uffdio_register.range.start = (unsigned long) area_dst; + if (ioctl(uffd, UFFDIO_UNREGISTER, + &uffdio_register.range)) { + fprintf(stderr, "unregister failure alias\n"); + return 1; + } + } /* verification */ if (bounces & BOUNCE_VERIFY) { @@ -936,6 +1168,10 @@ static int userfaultfd_stress(void) area_src = area_dst; area_dst = tmp_area; + tmp_area = area_src_alias; + area_src_alias = area_dst_alias; + area_dst_alias = tmp_area; + printf("userfaults:"); for (cpu = 0; cpu < nr_cpus; cpu++) printf(" %lu", userfaults[cpu]); @@ -946,7 +1182,8 @@ static int userfaultfd_stress(void) return err; close(uffd); - return userfaultfd_zeropage_test() || userfaultfd_events_test(); + return userfaultfd_zeropage_test() || userfaultfd_sig_test() + || userfaultfd_events_test(); } /* @@ -981,7 +1218,12 @@ static void set_test_type(const char *type) } else if (!strcmp(type, "hugetlb")) { test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; + } else if (!strcmp(type, "hugetlb_shared")) { + map_shared = true; + test_type = TEST_HUGETLB; + uffd_test_ops = &hugetlb_uffd_test_ops; } else if (!strcmp(type, "shmem")) { + map_shared = true; test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else { @@ -1001,12 +1243,25 @@ static void set_test_type(const char *type) fprintf(stderr, "Impossible to run this test\n"), exit(2); } +static void sigalrm(int sig) +{ + if (sig != SIGALRM) + abort(); + test_uffdio_copy_eexist = true; + test_uffdio_zeropage_eexist = true; + alarm(ALARM_INTERVAL_SECS); +} + int main(int argc, char **argv) { if (argc < 4) fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"), exit(1); + if (signal(SIGALRM, sigalrm) == SIG_ERR) + fprintf(stderr, "failed to arm SIGALRM"), exit(1); + alarm(ALARM_INTERVAL_SECS); + set_test_type(argv[1]); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile index f863c664e3d1..ee068511fd0b 100644 --- a/tools/testing/selftests/watchdog/Makefile +++ b/tools/testing/selftests/watchdog/Makefile @@ -1,8 +1,3 @@ -TEST_PROGS := watchdog-test - -all: $(TEST_PROGS) +TEST_GEN_PROGS := watchdog-test include ../lib.mk - -clean: - rm -fr $(TEST_PROGS) diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index a74c9d739d07..a1391be2dc1e 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -9,12 +9,25 @@ #include <unistd.h> #include <fcntl.h> #include <signal.h> +#include <getopt.h> #include <sys/ioctl.h> #include <linux/types.h> #include <linux/watchdog.h> +#define DEFAULT_PING_RATE 1 + int fd; const char v = 'V'; +static const char sopts[] = "bdehp:t:"; +static const struct option lopts[] = { + {"bootstatus", no_argument, NULL, 'b'}, + {"disable", no_argument, NULL, 'd'}, + {"enable", no_argument, NULL, 'e'}, + {"help", no_argument, NULL, 'h'}, + {"pingrate", required_argument, NULL, 'p'}, + {"timeout", required_argument, NULL, 't'}, + {NULL, no_argument, NULL, 0x0} +}; /* * This function simply sends an IOCTL to the driver, which in turn ticks @@ -23,12 +36,12 @@ const char v = 'V'; */ static void keep_alive(void) { - int dummy; - int ret; + int dummy; + int ret; - ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy); - if (!ret) - printf("."); + ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy); + if (!ret) + printf("."); } /* @@ -38,75 +51,110 @@ static void keep_alive(void) static void term(int sig) { - int ret = write(fd, &v, 1); - - close(fd); - if (ret < 0) - printf("\nStopping watchdog ticks failed (%d)...\n", errno); - else - printf("\nStopping watchdog ticks...\n"); - exit(0); + int ret = write(fd, &v, 1); + + close(fd); + if (ret < 0) + printf("\nStopping watchdog ticks failed (%d)...\n", errno); + else + printf("\nStopping watchdog ticks...\n"); + exit(0); +} + +static void usage(char *progname) +{ + printf("Usage: %s [options]\n", progname); + printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n"); + printf(" -d, --disable Turn off the watchdog timer\n"); + printf(" -e, --enable Turn on the watchdog timer\n"); + printf(" -h, --help Print the help message\n"); + printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE); + printf(" -t, --timeout=T Set timeout to T seconds\n"); + printf("\n"); + printf("Parameters are parsed left-to-right in real-time.\n"); + printf("Example: %s -d -t 10 -p 5 -e\n", progname); } int main(int argc, char *argv[]) { - int flags; - unsigned int ping_rate = 1; - int ret; - int i; - - setbuf(stdout, NULL); - - fd = open("/dev/watchdog", O_WRONLY); - - if (fd == -1) { - printf("Watchdog device not enabled.\n"); - exit(-1); - } - - for (i = 1; i < argc; i++) { - if (!strncasecmp(argv[i], "-d", 2)) { - flags = WDIOS_DISABLECARD; - ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); - if (!ret) - printf("Watchdog card disabled.\n"); - } else if (!strncasecmp(argv[i], "-e", 2)) { - flags = WDIOS_ENABLECARD; - ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); - if (!ret) - printf("Watchdog card enabled.\n"); - } else if (!strncasecmp(argv[i], "-t", 2) && argv[2]) { - flags = atoi(argv[i + 1]); - ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); - if (!ret) - printf("Watchdog timeout set to %u seconds.\n", flags); - i++; - } else if (!strncasecmp(argv[i], "-p", 2) && argv[2]) { - ping_rate = strtoul(argv[i + 1], NULL, 0); - printf("Watchdog ping rate set to %u seconds.\n", ping_rate); - i++; - } else { - printf("-d to disable, -e to enable, -t <n> to set " - "the timeout,\n-p <n> to set the ping rate, and "); - printf("run by itself to tick the card.\n"); - printf("Parameters are parsed left-to-right in real-time.\n"); - printf("Example: %s -d -t 10 -p 5 -e\n", argv[0]); - goto end; - } - } - - printf("Watchdog Ticking Away!\n"); - - signal(SIGINT, term); - - while(1) { - keep_alive(); - sleep(ping_rate); - } + int flags; + unsigned int ping_rate = DEFAULT_PING_RATE; + int ret; + int c; + int oneshot = 0; + + setbuf(stdout, NULL); + + fd = open("/dev/watchdog", O_WRONLY); + + if (fd == -1) { + printf("Watchdog device not enabled.\n"); + exit(-1); + } + + while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) { + switch (c) { + case 'b': + flags = 0; + oneshot = 1; + ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags); + if (!ret) + printf("Last boot is caused by: %s.\n", (flags != 0) ? + "Watchdog" : "Power-On-Reset"); + else + printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno)); + break; + case 'd': + flags = WDIOS_DISABLECARD; + ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); + if (!ret) + printf("Watchdog card disabled.\n"); + else + printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno)); + break; + case 'e': + flags = WDIOS_ENABLECARD; + ret = ioctl(fd, WDIOC_SETOPTIONS, &flags); + if (!ret) + printf("Watchdog card enabled.\n"); + else + printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno)); + break; + case 'p': + ping_rate = strtoul(optarg, NULL, 0); + if (!ping_rate) + ping_rate = DEFAULT_PING_RATE; + printf("Watchdog ping rate set to %u seconds.\n", ping_rate); + break; + case 't': + flags = strtoul(optarg, NULL, 0); + ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags); + if (!ret) + printf("Watchdog timeout set to %u seconds.\n", flags); + else + printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno)); + break; + default: + usage(argv[0]); + goto end; + } + } + + if (oneshot) + goto end; + + printf("Watchdog Ticking Away!\n"); + + signal(SIGINT, term); + + while (1) { + keep_alive(); + sleep(ping_rate); + } end: - ret = write(fd, &v, 1); - if (ret < 0) - printf("Stopping watchdog ticks failed (%d)...\n", errno); - close(fd); - return 0; + ret = write(fd, &v, 1); + if (ret < 0) + printf("Stopping watchdog ticks failed (%d)...\n", errno); + close(fd); + return 0; } diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 97f187e2663f..0a74a20ca32b 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -20,7 +20,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index a8df159a8924..ec0f6b45ce8b 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -391,8 +391,7 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define __SI_FAULT (3 << 16) -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ +#define SEGV_BNDERR 3 /* failed address bound checks */ dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index 3237bc010e1c..23927845518d 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -212,19 +212,18 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define __SI_FAULT (3 << 16) -#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ -#define SEGV_PKUERR (__SI_FAULT|4) +#define SEGV_BNDERR 3 /* failed address bound checks */ +#define SEGV_PKUERR 4 static char *si_code_str(int si_code) { - if (si_code & SEGV_MAPERR) + if (si_code == SEGV_MAPERR) return "SEGV_MAPERR"; - if (si_code & SEGV_ACCERR) + if (si_code == SEGV_ACCERR) return "SEGV_ACCERR"; - if (si_code & SEGV_BNDERR) + if (si_code == SEGV_BNDERR) return "SEGV_BNDERR"; - if (si_code & SEGV_PKUERR) + if (si_code == SEGV_PKUERR) return "SEGV_PKUERR"; return "UNKNOWN"; } |