181 files changed, 7230 insertions, 2272 deletions
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 69d09c39bbcd..cd7359e23d86 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -88,6 +88,12 @@ struct kvm_s390_io_adapter_req {
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_HIGH		1
+#define KVM_S390_VM_TOD_EXT		2
+
+struct kvm_s390_vm_tod_clock {
+	__u8  epoch_idx;
+	__u64 tod;
+};
 
 /* kvm attributes for KVM_S390_VM_CPU_MODEL */
 /* processor related attributes are r/w */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 8ea315a11fe0..2519c6c801c9 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -196,6 +196,7 @@
 
 #define X86_FEATURE_HW_PSTATE	( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME		( 7*32+10) /* AMD Secure Memory Encryption */
 
 #define X86_FEATURE_INTEL_PPIN	( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_INTEL_PT	( 7*32+15) /* Intel Processor Trace */
@@ -287,6 +288,7 @@
 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
 #define X86_FEATURE_AVIC	(15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF	(15*32+16) /* Virtual GIF */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 5dff775af7cd..c10c9128f54e 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -21,11 +21,13 @@
 # define DISABLE_K6_MTRR	(1<<(X86_FEATURE_K6_MTRR & 31))
 # define DISABLE_CYRIX_ARR	(1<<(X86_FEATURE_CYRIX_ARR & 31))
 # define DISABLE_CENTAUR_MCR	(1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID		0
 #else
 # define DISABLE_VME		0
 # define DISABLE_K6_MTRR	0
 # define DISABLE_CYRIX_ARR	0
 # define DISABLE_CENTAUR_MCR	0
+# define DISABLE_PCID		(1<<(X86_FEATURE_PCID & 31))
 #endif /* CONFIG_X86_64 */
 
 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
@@ -49,7 +51,7 @@
 #define DISABLED_MASK1	0
 #define DISABLED_MASK2	0
 #define DISABLED_MASK3	(DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4	0
+#define DISABLED_MASK4	(DISABLE_PCID)
 #define DISABLED_MASK5	0
 #define DISABLED_MASK6	0
 #define DISABLED_MASK7	0
diff --git a/tools/hv/bondvf.sh b/tools/hv/bondvf.sh
deleted file mode 100755
index 89b25068cd98..000000000000
--- a/tools/hv/bondvf.sh
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/bin/bash
-
-# This example script creates bonding network devices based on synthetic NIC
-# (the virtual network adapter usually provided by Hyper-V) and the matching
-# VF NIC (SRIOV virtual function). So the synthetic NIC and VF NIC can
-# function as one network device, and fail over to the synthetic NIC if VF is
-# down.
-#
-# Usage:
-# - After configured vSwitch and vNIC with SRIOV, start Linux virtual
-#   machine (VM)
-# - Run this scripts on the VM. It will create configuration files in
-#   distro specific directory.
-# - Reboot the VM, so that the bonding config are enabled.
-#
-# The config files are DHCP by default. You may edit them if you need to change
-# to Static IP or change other settings.
-#
-
-sysdir=/sys/class/net
-netvsc_cls={f8615163-df3e-46c5-913f-f2d2f965ed0e}
-bondcnt=0
-
-# Detect Distro
-if [ -f /etc/redhat-release ];
-then
-	cfgdir=/etc/sysconfig/network-scripts
-	distro=redhat
-elif grep -q 'Ubuntu' /etc/issue
-then
-	cfgdir=/etc/network
-	distro=ubuntu
-elif grep -q 'SUSE' /etc/issue
-then
-	cfgdir=/etc/sysconfig/network
-	distro=suse
-else
-	echo "Unsupported Distro"
-	exit 1
-fi
-
-echo Detected Distro: $distro, or compatible
-
-# Get a list of ethernet names
-list_eth=(`cd $sysdir && ls -d */ | cut -d/ -f1 | grep -v bond`)
-eth_cnt=${#list_eth[@]}
-
-echo List of net devices:
-
-# Get the MAC addresses
-for (( i=0; i < $eth_cnt; i++ ))
-do
-	list_mac[$i]=`cat $sysdir/${list_eth[$i]}/address`
-	echo ${list_eth[$i]}, ${list_mac[$i]}
-done
-
-# Find NIC with matching MAC
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-	for (( j=i+1; j < $eth_cnt; j++ ))
-	do
-		if [ "${list_mac[$i]}" = "${list_mac[$j]}" ]
-		then
-			list_match[$i]=${list_eth[$j]}
-			break
-		fi
-	done
-done
-
-function create_eth_cfg_redhat {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo DEVICE=$1 >>$fn
-	echo TYPE=Ethernet >>$fn
-	echo BOOTPROTO=none >>$fn
-	echo UUID=`uuidgen` >>$fn
-	echo ONBOOT=yes >>$fn
-	echo PEERDNS=yes >>$fn
-	echo IPV6INIT=yes >>$fn
-	echo MASTER=$2 >>$fn
-	echo SLAVE=yes >>$fn
-}
-
-function create_eth_cfg_pri_redhat {
-	create_eth_cfg_redhat $1 $2
-}
-
-function create_bond_cfg_redhat {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo DEVICE=$1 >>$fn
-	echo TYPE=Bond >>$fn
-	echo BOOTPROTO=dhcp >>$fn
-	echo UUID=`uuidgen` >>$fn
-	echo ONBOOT=yes >>$fn
-	echo PEERDNS=yes >>$fn
-	echo IPV6INIT=yes >>$fn
-	echo BONDING_MASTER=yes >>$fn
-	echo BONDING_OPTS=\"mode=active-backup miimon=100 primary=$2\" >>$fn
-}
-
-function del_eth_cfg_ubuntu {
-	local mainfn=$cfgdir/interfaces
-	local fnlist=( $mainfn )
-
-	local dirlist=(`awk '/^[ \t]*source/{print $2}' $mainfn`)
-
-	local i
-	for i in "${dirlist[@]}"
-	do
-		fnlist+=(`ls $i 2>/dev/null`)
-	done
-
-	local tmpfl=$(mktemp)
-
-	local nic_start='^[ \t]*(auto|iface|mapping|allow-.*)[ \t]+'$1
-	local nic_end='^[ \t]*(auto|iface|mapping|allow-.*|source)'
-
-	local fn
-	for fn in "${fnlist[@]}"
-	do
-		awk "/$nic_end/{x=0} x{next} /$nic_start/{x=1;next} 1" \
-			$fn >$tmpfl
-
-		cp $tmpfl $fn
-	done
-
-	rm $tmpfl
-}
-
-function create_eth_cfg_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-	echo $'\n'auto $1 >>$fn
-	echo iface $1 inet manual >>$fn
-	echo bond-master $2 >>$fn
-}
-
-function create_eth_cfg_pri_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-	echo $'\n'allow-hotplug $1 >>$fn
-	echo iface $1 inet manual >>$fn
-	echo bond-master $2 >>$fn
-	echo bond-primary $1 >>$fn
-}
-
-function create_bond_cfg_ubuntu {
-	local fn=$cfgdir/interfaces
-
-	del_eth_cfg_ubuntu $1
-
-	echo $'\n'auto $1 >>$fn
-	echo iface $1 inet dhcp >>$fn
-	echo bond-mode active-backup >>$fn
-	echo bond-miimon 100 >>$fn
-	echo bond-slaves none >>$fn
-}
-
-function create_eth_cfg_suse {
-        local fn=$cfgdir/ifcfg-$1
-
-        rm -f $fn
-	echo BOOTPROTO=none >>$fn
-	echo STARTMODE=auto >>$fn
-}
-
-function create_eth_cfg_pri_suse {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo BOOTPROTO=none >>$fn
-	echo STARTMODE=hotplug >>$fn
-}
-
-function create_bond_cfg_suse {
-	local fn=$cfgdir/ifcfg-$1
-
-	rm -f $fn
-	echo BOOTPROTO=dhcp >>$fn
-	echo STARTMODE=auto >>$fn
-	echo BONDING_MASTER=yes >>$fn
-	echo BONDING_SLAVE_0=$2 >>$fn
-	echo BONDING_SLAVE_1=$3 >>$fn
-	echo BONDING_MODULE_OPTS=\'mode=active-backup miimon=100 primary=$2\' >>$fn
-}
-
-function create_bond {
-	local bondname=bond$bondcnt
-	local primary
-	local secondary
-
-	local class_id1=`cat $sysdir/$1/device/class_id 2>/dev/null`
-	local class_id2=`cat $sysdir/$2/device/class_id 2>/dev/null`
-
-	if [ "$class_id1" = "$netvsc_cls" ]
-	then
-		primary=$2
-		secondary=$1
-	elif [ "$class_id2" = "$netvsc_cls" ]
-	then
-		primary=$1
-		secondary=$2
-	else
-		return 0
-	fi
-
-	echo $'\nBond name:' $bondname
-
-	echo configuring $primary
-	create_eth_cfg_pri_$distro $primary $bondname
-
-	echo configuring $secondary
-	create_eth_cfg_$distro $secondary $bondname
-
-	echo creating: $bondname with primary slave: $primary
-	create_bond_cfg_$distro $bondname $primary $secondary
-
-	let bondcnt=bondcnt+1
-}
-
-for (( i=0; i < $eth_cnt-1; i++ ))
-do
-        if [ -n "${list_match[$i]}" ]
-        then
-		create_bond ${list_eth[$i]} ${list_match[$i]}
-        fi
-done
diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h
new file mode 100644
index 000000000000..e4732d3c2998
--- /dev/null
+++ b/tools/include/asm-generic/hugetlb_encode.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size.  If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments.  The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments.  System call specific header files
+ * that use this encoding should include this file.  They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT	26
+#define HUGETLB_FLAG_ENCODE_MASK	0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB	(16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB	(19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB		(20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB		(21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB		(23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB	(24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB	(28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB		(30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB		(31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB	(34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index bd39b2090ad1..3723b9f8f964 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -21,11 +21,14 @@
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 
 #define  noinline	__attribute__((noinline))
-
+#ifndef __packed
 #define __packed	__attribute__((packed))
-
+#endif
+#ifndef __noreturn
 #define __noreturn	__attribute__((noreturn))
-
+#endif
+#ifndef __aligned
 #define __aligned(x)	__attribute__((aligned(x)))
+#endif
 #define __printf(a, b)	__attribute__((format(printf, a, b)))
 #define __scanf(a, b)	__attribute__((format(scanf, a, b)))
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
index 8c27db0c5c08..203268f9231e 100644
--- a/tools/include/uapi/asm-generic/mman-common.h
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -58,20 +58,12 @@
 					   overrides the coredump filter bits */
 #define MADV_DODUMP	17		/* Clear the MADV_DONTDUMP flag */
 
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-/*
- * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size.
- * This gives us 6 bits, which is enough until someone invents 128 bit address
- * spaces.
- *
- * Assume these are all power of twos.
- * When 0 use the default page size.
- */
-#define MAP_HUGE_SHIFT	26
-#define MAP_HUGE_MASK	0x3f
-
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
 #define PKEY_ACCESS_MASK	(PKEY_DISABLE_ACCESS |\
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 101593ab10ac..97677cd6964d 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -700,6 +700,7 @@ struct drm_prime_handle {
 
 struct drm_syncobj_create {
 	__u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
 	__u32 flags;
 };
 
@@ -718,6 +719,24 @@ struct drm_syncobj_handle {
 	__u32 pad;
 };
 
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct drm_syncobj_wait {
+	__u64 handles;
+	/* absolute timeout */
+	__s64 timeout_nsec;
+	__u32 count_handles;
+	__u32 flags;
+	__u32 first_signaled; /* only valid when not waiting all */
+	__u32 pad;
+};
+
+struct drm_syncobj_array {
+	__u64 handles;
+	__u32 count_handles;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif
@@ -840,6 +859,9 @@ extern "C" {
 #define DRM_IOCTL_SYNCOBJ_DESTROY	DRM_IOWR(0xC0, struct drm_syncobj_destroy)
 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD	DRM_IOWR(0xC1, struct drm_syncobj_handle)
 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE	DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT		DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET		DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL	DRM_IOWR(0xC5, struct drm_syncobj_array)
 
 /**
  * Device specific ioctls should only be in their respective headers
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 7ccbd6a2bbe0..6598fb76d2c2 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -260,6 +260,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_CONTEXT_GETPARAM	0x34
 #define DRM_I915_GEM_CONTEXT_SETPARAM	0x35
 #define DRM_I915_PERF_OPEN		0x36
+#define DRM_I915_PERF_ADD_CONFIG	0x37
+#define DRM_I915_PERF_REMOVE_CONFIG	0x38
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -315,6 +317,8 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
 #define DRM_IOCTL_I915_PERF_OPEN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -431,6 +435,11 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_BATCH_FIRST	 48
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures.  See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -812,6 +821,17 @@ struct drm_i915_gem_exec_object2 {
 	__u64 rsvd2;
 };
 
+struct drm_i915_gem_exec_fence {
+	/**
+	 * User's handle for a drm_syncobj to wait on or signal.
+	 */
+	__u32 handle;
+
+#define I915_EXEC_FENCE_WAIT            (1<<0)
+#define I915_EXEC_FENCE_SIGNAL          (1<<1)
+	__u32 flags;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -826,7 +846,11 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 DR1;
 	__u32 DR4;
 	__u32 num_cliprects;
-	/** This is a struct drm_clip_rect *cliprects */
+	/**
+	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
+	 * struct drm_i915_gem_exec_fence *fences.
+	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (7<<0)
 #define I915_EXEC_DEFAULT                (0<<0)
@@ -927,7 +951,14 @@ struct drm_i915_gem_execbuffer2 {
  * element).
  */
 #define I915_EXEC_BATCH_FIRST		(1<<18)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY   (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
@@ -1467,6 +1498,22 @@ enum drm_i915_perf_record_type {
 	DRM_I915_PERF_RECORD_MAX /* non-ABI */
 };
 
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+	/** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+	char uuid[36];
+
+	__u32 n_mux_regs;
+	__u32 n_boolean_regs;
+	__u32 n_flex_regs;
+
+	__u64 __user mux_regs_ptr;
+	__u64 __user boolean_regs_ptr;
+	__u64 __user flex_regs_ptr;
+};
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e99e3e6f8b37..43ab5c402f98 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -30,9 +30,14 @@
 #define BPF_FROM_LE	BPF_TO_LE
 #define BPF_FROM_BE	BPF_TO_BE
 
+/* jmp encodings */
 #define BPF_JNE		0x50	/* jump != */
+#define BPF_JLT		0xa0	/* LT is unsigned, '<' */
+#define BPF_JLE		0xb0	/* LE is unsigned, '<=' */
 #define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
 #define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT	0xc0	/* SLT is signed, '<' */
+#define BPF_JSLE	0xd0	/* SLE is signed, '<=' */
 #define BPF_CALL	0x80	/* function call */
 #define BPF_EXIT	0x90	/* function return */
 
@@ -104,6 +109,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_LPM_TRIE,
 	BPF_MAP_TYPE_ARRAY_OF_MAPS,
 	BPF_MAP_TYPE_HASH_OF_MAPS,
+	BPF_MAP_TYPE_DEVMAP,
+	BPF_MAP_TYPE_SOCKMAP,
 };
 
 enum bpf_prog_type {
@@ -121,6 +128,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_OUT,
 	BPF_PROG_TYPE_LWT_XMIT,
 	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_SKB,
 };
 
 enum bpf_attach_type {
@@ -128,6 +136,8 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET_EGRESS,
 	BPF_CGROUP_INET_SOCK_CREATE,
 	BPF_CGROUP_SOCK_OPS,
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -153,6 +163,7 @@ enum bpf_attach_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 
+/* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
 /* Instead of having one common LRU list in the
  * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
@@ -161,6 +172,8 @@ enum bpf_attach_type {
  * across different LRU lists.
  */
 #define BPF_F_NO_COMMON_LRU	(1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE		(1U << 2)
 
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
@@ -168,8 +181,13 @@ union bpf_attr {
 		__u32	key_size;	/* size of key in bytes */
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
-		__u32	map_flags;	/* prealloc or not */
+		__u32	map_flags;	/* BPF_MAP_CREATE related
+					 * flags defined above.
+					 */
 		__u32	inner_map_fd;	/* fd pointing to the inner map */
+		__u32	numa_node;	/* numa node (effective only if
+					 * BPF_F_NUMA_NODE is set).
+					 */
 	};
 
 	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -344,9 +362,20 @@ union bpf_attr {
  * int bpf_redirect(ifindex, flags)
  *     redirect to another netdev
  *     @ifindex: ifindex of the net device
- *     @flags: bit 0 - if set, redirect to ingress instead of egress
- *             other bits - reserved
- *     Return: TC_ACT_REDIRECT
+ *     @flags:
+ *	  cls_bpf:
+ *          bit 0 - if set, redirect to ingress instead of egress
+ *          other bits - reserved
+ *	  xdp_bpf:
+ *	    all bits - reserved
+ *     Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error
+ *	       xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error
+ * int bpf_redirect_map(map, key, flags)
+ *     redirect to endpoint in map
+ *     @map: pointer to dev map
+ *     @key: index in map to lookup
+ *     @flags: --
+ *     Return: XDP_REDIRECT on success or XDP_ABORT on error
  *
  * u32 bpf_get_route_realm(skb)
  *     retrieve a dst's tclassid
@@ -539,6 +568,20 @@ union bpf_attr {
  *     @mode: operation mode (enum bpf_adj_room_mode)
  *     @flags: reserved for future use
  *     Return: 0 on success or negative error code
+ *
+ * int bpf_sk_redirect_map(map, key, flags)
+ *     Redirect skb to a sock in map using key as a lookup key for the
+ *     sock in map.
+ *     @map: pointer to sockmap
+ *     @key: key to lookup sock in map
+ *     @flags: reserved for future use
+ *     Return: SK_REDIRECT
+ *
+ * int bpf_sock_map_update(skops, map, key, flags)
+ *	@skops: pointer to bpf_sock_ops
+ *	@map: pointer to sockmap to update
+ *	@key: key to insert/update sock in map
+ *	@flags: same flags as map update elem
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -591,7 +634,10 @@ union bpf_attr {
 	FN(get_socket_uid),		\
 	FN(set_hash),			\
 	FN(setsockopt),			\
-	FN(skb_adjust_room),
+	FN(skb_adjust_room),		\
+	FN(redirect_map),		\
+	FN(sk_redirect_map),		\
+	FN(sock_map_update),		\
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -668,6 +714,15 @@ struct __sk_buff {
 	__u32 data;
 	__u32 data_end;
 	__u32 napi_id;
+
+	/* accessed by BPF_PROG_TYPE_sk_skb types */
+	__u32 family;
+	__u32 remote_ip4;	/* Stored in network byte order */
+	__u32 local_ip4;	/* Stored in network byte order */
+	__u32 remote_ip6[4];	/* Stored in network byte order */
+	__u32 local_ip6[4];	/* Stored in network byte order */
+	__u32 remote_port;	/* Stored in network byte order */
+	__u32 local_port;	/* stored in host byte order */
 };
 
 struct bpf_tunnel_key {
@@ -703,20 +758,23 @@ struct bpf_sock {
 	__u32 family;
 	__u32 type;
 	__u32 protocol;
+	__u32 mark;
+	__u32 priority;
 };
 
 #define XDP_PACKET_HEADROOM 256
 
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
- * return codes are reserved for future use. Unknown return codes will result
- * in packet drop.
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
  */
 enum xdp_action {
 	XDP_ABORTED = 0,
 	XDP_DROP,
 	XDP_PASS,
 	XDP_TX,
+	XDP_REDIRECT,
 };
 
 /* user accessible metadata for XDP packet hook
@@ -727,6 +785,12 @@ struct xdp_md {
 	__u32 data_end;
 };
 
+enum sk_action {
+	SK_ABORTED = 0,
+	SK_DROP,
+	SK_REDIRECT,
+};
+
 #define BPF_TAG_SIZE	8
 
 struct bpf_prog_info {
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 6cd63c18708a..838887587411 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -711,7 +711,8 @@ struct kvm_ppc_one_seg_page_size {
 struct kvm_ppc_smmu_info {
 	__u64 flags;
 	__u32 slb_size;
-	__u32 pad;
+	__u16 data_keys;	/* # storage keys supported for data */
+	__u16 instr_keys;	/* # storage keys supported for instructions */
 	struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
 };
 
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index 81d8edf11789..a937480d7cd3 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -1,7 +1,8 @@
 #ifndef _UAPI_LINUX_MMAN_H
 #define _UAPI_LINUX_MMAN_H
 
-#include <uapi/asm/mman.h>
+#include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
 
 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2
@@ -10,4 +11,25 @@
 #define OVERCOMMIT_ALWAYS		1
 #define OVERCOMMIT_NEVER		2
 
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired.  See hugetlb_encode.h.
+ * All known huge page size encodings are provided here.  It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system.  See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT	HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK	HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB	HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB	HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB	HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB	HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB	HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB	HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_256MB	HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_1GB	HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB	HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB	HUGETLB_FLAG_ENCODE_16GB
+
 #endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 2a37ae925d85..140ae638cfd6 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -139,8 +139,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
+	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 
-	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 20,		/* non-ABI */
 };
 
 /*
@@ -814,6 +815,7 @@ enum perf_event_type {
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
 	 *	{ u64			abi; # enum perf_sample_regs_abi
 	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 4563ba7ede6f..1e83e3c07448 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -17,13 +17,19 @@ MAKEFLAGS += --no-print-directory
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
-CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC
 
+ifeq ($(DEBUG),0)
 ifeq ($(CC_NO_CLANG), 0)
   CFLAGS += -O3
 else
   CFLAGS += -O6
 endif
+endif
+
+ifeq ($(DEBUG),0)
+  CFLAGS += -D_FORTIFY_SOURCE
+endif
 
 # Treat warnings as errors unless directed not to
 ifneq ($(WERROR),0)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4ed0257dc1f3..d2441db34740 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -189,6 +189,10 @@ install_lib: all_cmd
 	$(call QUIET_INSTALL, $(LIB_FILE)) \
 		$(call do_install,$(LIB_FILE),$(libdir_SQ))
 
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,bpf.h,$(prefix)/include/bpf,644)
+
 install: install_lib
 
 ### Cleaning rules
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e5bbb090bf88..1d6907d379c9 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -57,8 +57,9 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 	return syscall(__NR_bpf, cmd, attr, size);
 }
 
-int bpf_create_map(enum bpf_map_type map_type, int key_size,
-		   int value_size, int max_entries, __u32 map_flags)
+int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
+			int value_size, int max_entries, __u32 map_flags,
+			int node)
 {
 	union bpf_attr attr;
 
@@ -69,12 +70,24 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
 	attr.value_size = value_size;
 	attr.max_entries = max_entries;
 	attr.map_flags = map_flags;
+	if (node >= 0) {
+		attr.map_flags |= BPF_F_NUMA_NODE;
+		attr.numa_node = node;
+	}
 
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
-int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
-			  int inner_map_fd, int max_entries, __u32 map_flags)
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+		   int value_size, int max_entries, __u32 map_flags)
+{
+	return bpf_create_map_node(map_type, key_size, value_size,
+				   max_entries, map_flags, -1);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
+			       int inner_map_fd, int max_entries,
+			       __u32 map_flags, int node)
 {
 	union bpf_attr attr;
 
@@ -86,10 +99,21 @@ int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
 	attr.inner_map_fd = inner_map_fd;
 	attr.max_entries = max_entries;
 	attr.map_flags = map_flags;
+	if (node >= 0) {
+		attr.map_flags |= BPF_F_NUMA_NODE;
+		attr.numa_node = node;
+	}
 
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
+int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
+			  int inner_map_fd, int max_entries, __u32 map_flags)
+{
+	return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd,
+					  max_entries, map_flags, -1);
+}
+
 int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf, size_t log_buf_sz)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 418c86e69bcb..b8ea5843c39e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -24,8 +24,14 @@
 #include <linux/bpf.h>
 #include <stddef.h>
 
+int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
+			int value_size, int max_entries, __u32 map_flags,
+			int node);
 int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
 		   int max_entries, __u32 map_flags);
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
+			       int inner_map_fd, int max_entries,
+			       __u32 map_flags, int node);
 int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
 			  int inner_map_fd, int max_entries, __u32 map_flags);
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8c67a90dbd82..35f6dfcdc565 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1745,3 +1745,32 @@ long libbpf_get_error(const void *ptr)
 		return PTR_ERR(ptr);
 	return 0;
 }
+
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+		  struct bpf_object **pobj, int *prog_fd)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err;
+
+	obj = bpf_object__open(file);
+	if (IS_ERR(obj))
+		return -ENOENT;
+
+	prog = bpf_program__next(NULL, obj);
+	if (!prog) {
+		bpf_object__close(obj);
+		return -ENOENT;
+	}
+
+	bpf_program__set_type(prog, type);
+	err = bpf_object__load(obj);
+	if (err) {
+		bpf_object__close(obj);
+		return -EINVAL;
+	}
+
+	*pobj = obj;
+	*prog_fd = bpf_program__fd(prog);
+	return 0;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 32c7252f734e..7959086eb9c9 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -243,4 +243,6 @@ int bpf_map__pin(struct bpf_map *map, const char *path);
 
 long libbpf_get_error(const void *ptr);
 
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+		  struct bpf_object **pobj, int *prog_fd);
 #endif
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index 6a1af43862df..3995735a878f 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -194,10 +194,10 @@ they mean, and suggestions for how to fix them.
    If it's a GCC-compiled .c file, the error may be because the function
    uses an inline asm() statement which has a "call" instruction.  An
    asm() statement with a call instruction must declare the use of the
-   stack pointer in its output operand.  For example, on x86_64:
+   stack pointer in its output operand.  On x86_64, this means adding
+   the ASM_CALL_CONSTRAINT as an output constraint:
 
-     register void *__sp asm("rsp");
-     asm volatile("call func" : "+r" (__sp));
+     asm volatile("call func" : ASM_CALL_CONSTRAINT);
 
    Otherwise the stack frame may not get created before the call.
 
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 0e8c8ec4fd4e..34a579f806e3 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -208,14 +208,14 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 		break;
 
 	case 0x89:
-		if (rex == 0x48 && modrm == 0xe5) {
+		if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
 
-			/* mov %rsp, %rbp */
+			/* mov %rsp, reg */
 			*type = INSN_STACK;
 			op->src.type = OP_SRC_REG;
 			op->src.reg = CFI_SP;
 			op->dest.type = OP_DEST_REG;
-			op->dest.reg = CFI_BP;
+			op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
 			break;
 		}
 
@@ -284,11 +284,16 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
 	case 0x8d:
 		if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
 
-			/* lea disp(%rsp), reg */
 			*type = INSN_STACK;
-			op->src.type = OP_SRC_ADD;
+			if (!insn.displacement.value) {
+				/* lea (%rsp), reg */
+				op->src.type = OP_SRC_REG;
+			} else {
+				/* lea disp(%rsp), reg */
+				op->src.type = OP_SRC_ADD;
+				op->src.offset = insn.displacement.value;
+			}
 			op->src.reg = CFI_SP;
-			op->src.offset = insn.displacement.value;
 			op->dest.type = OP_DEST_REG;
 			op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f744617c9946..a0c518ecf085 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1203,24 +1203,39 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
 		switch (op->src.type) {
 
 		case OP_SRC_REG:
-			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP) {
+			if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+			    cfa->base == CFI_SP &&
+			    regs[CFI_BP].base == CFI_CFA &&
+			    regs[CFI_BP].offset == -cfa->offset) {
+
+				/* mov %rsp, %rbp */
+				cfa->base = op->dest.reg;
+				state->bp_scratch = false;
+			}
 
-				if (cfa->base == CFI_SP &&
-				    regs[CFI_BP].base == CFI_CFA &&
-				    regs[CFI_BP].offset == -cfa->offset) {
+			else if (op->src.reg == CFI_SP &&
+				 op->dest.reg == CFI_BP && state->drap) {
 
-					/* mov %rsp, %rbp */
-					cfa->base = op->dest.reg;
-					state->bp_scratch = false;
-				}
+				/* drap: mov %rsp, %rbp */
+				regs[CFI_BP].base = CFI_BP;
+				regs[CFI_BP].offset = -state->stack_size;
+				state->bp_scratch = false;
+			}
 
-				else if (state->drap) {
+			else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
 
-					/* drap: mov %rsp, %rbp */
-					regs[CFI_BP].base = CFI_BP;
-					regs[CFI_BP].offset = -state->stack_size;
-					state->bp_scratch = false;
-				}
+				/*
+				 * mov %rsp, %reg
+				 *
+				 * This is needed for the rare case where GCC
+				 * does:
+				 *
+				 *   mov    %rsp, %rax
+				 *   ...
+				 *   mov    %rax, %rsp
+				 */
+				state->vals[op->dest.reg].base = CFI_CFA;
+				state->vals[op->dest.reg].offset = -state->stack_size;
 			}
 
 			else if (op->dest.reg == cfa->base) {
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 6e9f980a7d26..24460155c82c 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -175,19 +175,20 @@ static int read_sections(struct elf *elf)
 			return -1;
 		}
 
-		sec->data = elf_getdata(s, NULL);
-		if (!sec->data) {
-			WARN_ELF("elf_getdata");
-			return -1;
-		}
-
-		if (sec->data->d_off != 0 ||
-		    sec->data->d_size != sec->sh.sh_size) {
-			WARN("unexpected data attributes for %s", sec->name);
-			return -1;
+		if (sec->sh.sh_size != 0) {
+			sec->data = elf_getdata(s, NULL);
+			if (!sec->data) {
+				WARN_ELF("elf_getdata");
+				return -1;
+			}
+			if (sec->data->d_off != 0 ||
+			    sec->data->d_size != sec->sh.sh_size) {
+				WARN("unexpected data attributes for %s",
+				     sec->name);
+				return -1;
+			}
 		}
-
-		sec->len = sec->data->d_size;
+		sec->len = sec->sh.sh_size;
 	}
 
 	/* sanity check, one more call to elf_nextscn() should return NULL */
@@ -508,6 +509,7 @@ struct section *elf_create_rela_section(struct elf *elf, struct section *base)
 	strcat(relaname, base->name);
 
 	sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
+	free(relaname);
 	if (!sec)
 		return NULL;
 
@@ -561,6 +563,7 @@ int elf_write(struct elf *elf)
 	struct section *sec;
 	Elf_Scn *s;
 
+	/* Update section headers for changed sections: */
 	list_for_each_entry(sec, &elf->sections, list) {
 		if (sec->changed) {
 			s = elf_getscn(elf->elf, sec->idx);
@@ -568,13 +571,17 @@ int elf_write(struct elf *elf)
 				WARN_ELF("elf_getscn");
 				return -1;
 			}
-			if (!gelf_update_shdr (s, &sec->sh)) {
+			if (!gelf_update_shdr(s, &sec->sh)) {
 				WARN_ELF("gelf_update_shdr");
 				return -1;
 			}
 		}
 	}
 
+	/* Make sure the new section header entries get updated properly. */
+	elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY);
+
+	/* Write all changes to the file. */
 	if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
 		WARN_ELF("elf_update");
 		return -1;
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index ad54a58d7dda..9074b477bff0 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -173,6 +173,7 @@ usage:
 			"\t-D <dev>		PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
 			"\t-b <bar num>		BAR test (bar number between 0..5)\n"
 			"\t-m <msi num>		MSI test (msi number between 1..32)\n"
+			"\t-l			Legacy IRQ test\n"
 			"\t-r			Read buffer test\n"
 			"\t-w			Write buffer test\n"
 			"\t-c			Copy buffer test\n"
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index ab1b0825130a..76971d2e4164 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -873,7 +873,7 @@ amended to take the number of elements as a parameter.
 
 	$ cat ~/.perfconfig
 	[intel-pt]
-		mispred-all
+		mispred-all = on
 
 	$ perf record -e intel_pt//u ./sort 3000
 	Bubble sorting array of 3000 elements
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 73496320fca3..4be08a1e3f8d 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -59,6 +59,10 @@ OPTIONS
 --ldload::
 	Specify desired latency for loads event.
 
+-p::
+--phys-data::
+	Record/Report sample physical addresses
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9bdea047c5db..e397453e5a46 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -249,7 +249,10 @@ OPTIONS
 
 -d::
 --data::
-	Record the sample addresses.
+	Record the sample virtual addresses.
+
+--phys-data::
+	Record the sample physical addresses.
 
 -T::
 --timestamp::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9fa84617181e..383a98d992ed 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -137,6 +137,7 @@ OPTIONS
 	- mem: type of memory access for the data at the time of the sample
 	- snoop: type of snoop (if any) for the data at the time of the sample
 	- dcacheline: the cacheline the data address is on at the time of the sample
+	- phys_daddr: physical address of data being executed on at the time of sample
 
 	And the default sort keys are changed to local_weight, mem, sym, dso,
 	symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 5ee8796be96e..18dfcfa38454 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
         Comma separated list of fields to print. Options are:
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, brstack, brstacksym, flags, bpf-output, brstackinsn, brstackoff,
-        callindent, insn, insnlen, synth.
+        callindent, insn, insnlen, synth, phys_addr.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index c1e3288a2dfb..d53bea6bd571 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -37,7 +37,7 @@ OPTIONS
 --expr::
 --event::
 	List of syscalls and other perf events (tracepoints, HW cache events,
-	etc) to show.
+	etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
 	See 'perf list' for a complete list of events.
 	Prefixing with ! shows all syscalls but the ones specified.  You may
 	need to escape it.
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 62072822dc85..627b7cada144 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,34 +1,8 @@
 tools/perf
-tools/arch/alpha/include/asm/barrier.h
-tools/arch/arm/include/asm/barrier.h
-tools/arch/arm64/include/asm/barrier.h
-tools/arch/ia64/include/asm/barrier.h
-tools/arch/mips/include/asm/barrier.h
-tools/arch/powerpc/include/asm/barrier.h
-tools/arch/s390/include/asm/barrier.h
-tools/arch/sh/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier.h
-tools/arch/sparc/include/asm/barrier_32.h
-tools/arch/sparc/include/asm/barrier_64.h
-tools/arch/tile/include/asm/barrier.h
-tools/arch/x86/include/asm/barrier.h
-tools/arch/x86/include/asm/cmpxchg.h
-tools/arch/x86/include/asm/cpufeatures.h
-tools/arch/x86/include/asm/disabled-features.h
-tools/arch/x86/include/asm/required-features.h
-tools/arch/x86/include/uapi/asm/svm.h
-tools/arch/x86/include/uapi/asm/vmx.h
-tools/arch/x86/include/uapi/asm/kvm.h
-tools/arch/x86/include/uapi/asm/kvm_perf.h
-tools/arch/x86/lib/memcpy_64.S
-tools/arch/x86/lib/memset_64.S
-tools/arch/s390/include/uapi/asm/kvm_perf.h
-tools/arch/s390/include/uapi/asm/sie.h
-tools/arch/xtensa/include/asm/barrier.h
+tools/arch
 tools/scripts
 tools/build
-tools/arch/x86/include/asm/atomic.h
-tools/arch/x86/include/asm/rmwcc.h
+tools/include
 tools/lib/traceevent
 tools/lib/api
 tools/lib/bpf
@@ -42,60 +16,3 @@ tools/lib/find_bit.c
 tools/lib/bitmap.c
 tools/lib/str_error_r.c
 tools/lib/vsprintf.c
-tools/include/asm/alternative-asm.h
-tools/include/asm/atomic.h
-tools/include/asm/barrier.h
-tools/include/asm/bug.h
-tools/include/asm-generic/atomic-gcc.h
-tools/include/asm-generic/barrier.h
-tools/include/asm-generic/bitops/arch_hweight.h
-tools/include/asm-generic/bitops/atomic.h
-tools/include/asm-generic/bitops/const_hweight.h
-tools/include/asm-generic/bitops/__ffs.h
-tools/include/asm-generic/bitops/__ffz.h
-tools/include/asm-generic/bitops/__fls.h
-tools/include/asm-generic/bitops/find.h
-tools/include/asm-generic/bitops/fls64.h
-tools/include/asm-generic/bitops/fls.h
-tools/include/asm-generic/bitops/hweight.h
-tools/include/asm-generic/bitops.h
-tools/include/linux/atomic.h
-tools/include/linux/bitops.h
-tools/include/linux/compiler.h
-tools/include/linux/compiler-gcc.h
-tools/include/linux/coresight-pmu.h
-tools/include/linux/bug.h
-tools/include/linux/filter.h
-tools/include/linux/hash.h
-tools/include/linux/kernel.h
-tools/include/linux/list.h
-tools/include/linux/log2.h
-tools/include/uapi/asm-generic/fcntl.h
-tools/include/uapi/asm-generic/ioctls.h
-tools/include/uapi/asm-generic/mman-common.h
-tools/include/uapi/asm-generic/mman.h
-tools/include/uapi/drm/drm.h
-tools/include/uapi/drm/i915_drm.h
-tools/include/uapi/linux/bpf.h
-tools/include/uapi/linux/bpf_common.h
-tools/include/uapi/linux/fcntl.h
-tools/include/uapi/linux/hw_breakpoint.h
-tools/include/uapi/linux/kvm.h
-tools/include/uapi/linux/mman.h
-tools/include/uapi/linux/perf_event.h
-tools/include/uapi/linux/sched.h
-tools/include/uapi/linux/stat.h
-tools/include/uapi/linux/vhost.h
-tools/include/uapi/sound/asound.h
-tools/include/linux/poison.h
-tools/include/linux/rbtree.h
-tools/include/linux/rbtree_augmented.h
-tools/include/linux/refcount.h
-tools/include/linux/string.h
-tools/include/linux/stringify.h
-tools/include/linux/types.h
-tools/include/linux/err.h
-tools/include/linux/bitmap.h
-tools/include/linux/time64.h
-tools/arch/*/include/uapi/asm/mman.h
-tools/arch/*/include/uapi/asm/perf_regs.h
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index bd518b623d7a..5bd7b9260cc0 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -1,5 +1,4 @@
 libperf-y += header.o
-libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/s390/util/sym-handling.c b/tools/perf/arch/s390/util/sym-handling.c
deleted file mode 100644
index e103f6e46afe..000000000000
--- a/tools/perf/arch/s390/util/sym-handling.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Architecture specific ELF symbol handling and relocation mapping.
- *
- * Copyright 2017 IBM Corp.
- * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License (version 2 only)
- * as published by the Free Software Foundation.
- */
-
-#include "symbol.h"
-
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
-	if (ehdr.e_type == ET_EXEC)
-		return false;
-	return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN;
-}
-
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-				 GElf_Shdr *shdr __maybe_unused,
-				 struct map *map)
-{
-	if (map->type == MAP__FUNCTION)
-		sym->st_value += map->start;
-}
-#endif
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
index 3ddcc6e2abeb..a1d82e33282c 100644
--- a/tools/perf/builtin-config.c
+++ b/tools/perf/builtin-config.c
@@ -59,7 +59,7 @@ static int set_config(struct perf_config_set *set, const char *file_name,
 		fprintf(fp, "[%s]\n", section->name);
 
 		perf_config_items__for_each_entry(&section->items, item) {
-			if (!use_system_config && section->from_system_config)
+			if (!use_system_config && item->from_system_config)
 				continue;
 			if (item->value)
 				fprintf(fp, "\t%s = %s\n",
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index a1497c516d85..24ee68ecdd42 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -627,7 +627,6 @@ static const struct {
 	{ "GFP_HIGHUSER_MOVABLE",	"HUM" },
 	{ "GFP_HIGHUSER",		"HU" },
 	{ "GFP_USER",			"U" },
-	{ "GFP_TEMPORARY",		"TMP" },
 	{ "GFP_KERNEL_ACCOUNT",		"KAC" },
 	{ "GFP_KERNEL",			"K" },
 	{ "GFP_NOFS",			"NF" },
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index e001c0290793..0f15634ef82c 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -23,6 +23,7 @@ struct perf_mem {
 	bool			hide_unresolved;
 	bool			dump_raw;
 	bool			force;
+	bool			phys_addr;
 	int			operation;
 	const char		*cpu_list;
 	DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
@@ -101,6 +102,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
 
 	rec_argv[i++] = "-d";
 
+	if (mem->phys_addr)
+		rec_argv[i++] = "--phys-data";
+
 	for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
 		if (!perf_mem_events[j].record)
 			continue;
@@ -161,30 +165,60 @@ dump_raw_samples(struct perf_tool *tool,
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
-	if (symbol_conf.field_sep) {
-		fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
-		      "%s0x%"PRIx64"%s%s:%s\n";
+	if (mem->phys_addr) {
+		if (symbol_conf.field_sep) {
+			fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
+			      "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+		} else {
+			fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+			      "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
+			      "%s%s:%s\n";
+			symbol_conf.field_sep = " ";
+		}
+
+		printf(fmt,
+			sample->pid,
+			symbol_conf.field_sep,
+			sample->tid,
+			symbol_conf.field_sep,
+			sample->ip,
+			symbol_conf.field_sep,
+			sample->addr,
+			symbol_conf.field_sep,
+			sample->phys_addr,
+			symbol_conf.field_sep,
+			sample->weight,
+			symbol_conf.field_sep,
+			sample->data_src,
+			symbol_conf.field_sep,
+			al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+			al.sym ? al.sym->name : "???");
 	} else {
-		fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
-		      "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
-		symbol_conf.field_sep = " ";
-	}
+		if (symbol_conf.field_sep) {
+			fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
+			      "%s0x%"PRIx64"%s%s:%s\n";
+		} else {
+			fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+			      "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+			symbol_conf.field_sep = " ";
+		}
 
-	printf(fmt,
-		sample->pid,
-		symbol_conf.field_sep,
-		sample->tid,
-		symbol_conf.field_sep,
-		sample->ip,
-		symbol_conf.field_sep,
-		sample->addr,
-		symbol_conf.field_sep,
-		sample->weight,
-		symbol_conf.field_sep,
-		sample->data_src,
-		symbol_conf.field_sep,
-		al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
-		al.sym ? al.sym->name : "???");
+		printf(fmt,
+			sample->pid,
+			symbol_conf.field_sep,
+			sample->tid,
+			symbol_conf.field_sep,
+			sample->ip,
+			symbol_conf.field_sep,
+			sample->addr,
+			symbol_conf.field_sep,
+			sample->weight,
+			symbol_conf.field_sep,
+			sample->data_src,
+			symbol_conf.field_sep,
+			al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+			al.sym ? al.sym->name : "???");
+	}
 out_put:
 	addr_location__put(&al);
 	return 0;
@@ -224,7 +258,10 @@ static int report_raw_events(struct perf_mem *mem)
 	if (ret < 0)
 		goto out_delete;
 
-	printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+	if (mem->phys_addr)
+		printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+	else
+		printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
 
 	ret = perf_session__process_events(session);
 
@@ -254,9 +291,16 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
 	 * there is no weight (cost) associated with stores, so don't print
 	 * the column
 	 */
-	if (!(mem->operation & MEM_OPERATION_LOAD))
-		rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
-				"dso_daddr,tlb,locked";
+	if (!(mem->operation & MEM_OPERATION_LOAD)) {
+		if (mem->phys_addr)
+			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+					"dso_daddr,tlb,locked,phys_daddr";
+		else
+			rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+					"dso_daddr,tlb,locked";
+	} else if (mem->phys_addr)
+		rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+				"dso_daddr,snoop,tlb,locked,phys_daddr";
 
 	for (j = 1; j < argc; j++, i++)
 		rep_argv[i] = argv[j];
@@ -373,6 +417,7 @@ int cmd_mem(int argc, const char **argv)
 		   "separator for columns, no spaces will be added"
 		   " between columns '.' is reserved."),
 	OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
+	OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
 	OPT_END()
 	};
 	const char *const mem_subcommands[] = { "record", "report", NULL };
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 36d7117a7562..56f8142ff97f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1604,6 +1604,8 @@ static struct option __record_options[] = {
 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 		    "per thread counts"),
 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
+		    "Record the sample physical addresses"),
 	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
 	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
 			&record.opts.sample_time_set,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 378f76cdf923..3d4c3b5e1868 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -87,6 +87,7 @@ enum perf_output_field {
 	PERF_OUTPUT_BRSTACKINSN	    = 1U << 23,
 	PERF_OUTPUT_BRSTACKOFF	    = 1U << 24,
 	PERF_OUTPUT_SYNTH           = 1U << 25,
+	PERF_OUTPUT_PHYS_ADDR       = 1U << 26,
 };
 
 struct output_option {
@@ -119,6 +120,7 @@ struct output_option {
 	{.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
 	{.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
 	{.str = "synth", .field = PERF_OUTPUT_SYNTH},
+	{.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
 };
 
 enum {
@@ -175,7 +177,8 @@ static struct {
 			      PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
 			      PERF_OUTPUT_SYM | PERF_OUTPUT_DSO |
 			      PERF_OUTPUT_PERIOD |  PERF_OUTPUT_ADDR |
-			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT,
+			      PERF_OUTPUT_DATA_SRC | PERF_OUTPUT_WEIGHT |
+			      PERF_OUTPUT_PHYS_ADDR,
 
 		.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
 	},
@@ -382,6 +385,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 					PERF_OUTPUT_IREGS))
 		return -EINVAL;
 
+	if (PRINT_FIELD(PHYS_ADDR) &&
+		perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
+					PERF_OUTPUT_PHYS_ADDR))
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -1446,6 +1454,9 @@ static void process_event(struct perf_script *script,
 	if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
 		print_sample_bpf_output(sample);
 	print_insn(sample, attr, thread, machine);
+
+	if (PRINT_FIELD(PHYS_ADDR))
+		printf("%16" PRIx64, sample->phys_addr);
 	printf("\n");
 }
 
@@ -2729,7 +2740,7 @@ int cmd_script(int argc, const char **argv)
 		     "Valid types: hw,sw,trace,raw,synth. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
 		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
-		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth",
+		     "bpf-output,callindent,insn,insnlen,brstackinsn,synth,phys_addr",
 		     parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 866da7aa54bf..69523ed55894 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -707,7 +707,7 @@ try_again:
 				process_interval();
 			}
 		}
-		wait(&status);
+		waitpid(child_pid, &status, 0);
 
 		if (workload_exec_errno) {
 			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -1257,7 +1257,7 @@ static bool collect_data(struct perf_evsel *counter,
 	if (counter->merged_stat)
 		return false;
 	cb(counter, data, true);
-	if (!no_merge)
+	if (!no_merge && counter->auto_merge_stats)
 		collect_all_aliases(counter, cb, data);
 	return true;
 }
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d59cdadf3a79..771ddab94bb0 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1261,6 +1261,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 static int trace__validate_ev_qualifier(struct trace *trace)
 {
 	int err = 0, i;
+	size_t nr_allocated;
 	struct str_node *pos;
 
 	trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
@@ -1274,13 +1275,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 		goto out;
 	}
 
+	nr_allocated = trace->ev_qualifier_ids.nr;
 	i = 0;
 
 	strlist__for_each_entry(pos, trace->ev_qualifier) {
 		const char *sc = pos->s;
-		int id = syscalltbl__id(trace->sctbl, sc);
+		int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
 
 		if (id < 0) {
+			id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
+			if (id >= 0)
+				goto matches;
+
 			if (err == 0) {
 				fputs("Error:\tInvalid syscall ", trace->output);
 				err = -EINVAL;
@@ -1290,13 +1296,37 @@ static int trace__validate_ev_qualifier(struct trace *trace)
 
 			fputs(sc, trace->output);
 		}
-
+matches:
 		trace->ev_qualifier_ids.entries[i++] = id;
+		if (match_next == -1)
+			continue;
+
+		while (1) {
+			id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
+			if (id < 0)
+				break;
+			if (nr_allocated == trace->ev_qualifier_ids.nr) {
+				void *entries;
+
+				nr_allocated += 8;
+				entries = realloc(trace->ev_qualifier_ids.entries,
+						  nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
+				if (entries == NULL) {
+					err = -ENOMEM;
+					fputs("\nError:\t Not enough memory for parsing\n", trace->output);
+					goto out_free;
+				}
+				trace->ev_qualifier_ids.entries = entries;
+			}
+			trace->ev_qualifier_ids.nr++;
+			trace->ev_qualifier_ids.entries[i++] = id;
+		}
 	}
 
 	if (err < 0) {
 		fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
 		      "\nHint:\tand: 'man syscalls'\n", trace->output);
+out_free:
 		zfree(&trace->ev_qualifier_ids.entries);
 		trace->ev_qualifier_ids.nr = 0;
 	}
@@ -2814,7 +2844,7 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 	struct trace *trace = (struct trace *)opt->value;
 	const char *s = str;
 	char *sep = NULL, *lists[2] = { NULL, NULL, };
-	int len = strlen(str) + 1, err = -1, list;
+	int len = strlen(str) + 1, err = -1, list, idx;
 	char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
 	char group_name[PATH_MAX];
 
@@ -2831,7 +2861,8 @@ static int trace__parse_events_option(const struct option *opt, const char *str,
 			*sep = '\0';
 
 		list = 0;
-		if (syscalltbl__id(trace->sctbl, s) >= 0) {
+		if (syscalltbl__id(trace->sctbl, s) >= 0 ||
+		    syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
 			list = 1;
 		} else {
 			path__join(group_name, sizeof(group_name), strace_groups_dir, s);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index e0279babe0c0..2f19e03c5c40 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -467,15 +467,21 @@ int main(int argc, const char **argv)
 	 *  - cannot execute it externally (since it would just do
 	 *    the same thing over again)
 	 *
-	 * So we just directly call the internal command handler, and
-	 * die if that one cannot handle it.
+	 * So we just directly call the internal command handler. If that one
+	 * fails to handle this, then maybe we just run a renamed perf binary
+	 * that contains a dash in its name. To handle this scenario, we just
+	 * fall through and ignore the "xxxx" part of the command string.
 	 */
 	if (strstarts(cmd, "perf-")) {
 		cmd += 5;
 		argv[0] = cmd;
 		handle_internal_command(argc, argv);
-		fprintf(stderr, "cannot handle %s internally", cmd);
-		goto out;
+		/*
+		 * If the command is handled, the above function does not
+		 * return undo changes and fall through in such a case.
+		 */
+		cmd -= 5;
+		argv[0] = cmd;
 	}
 	if (strstarts(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2c010dd6a79d..dc442ba21bf6 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -43,6 +43,7 @@ struct record_opts {
 	bool	     no_samples;
 	bool	     raw_samples;
 	bool	     sample_address;
+	bool	     sample_phys_addr;
 	bool	     sample_weight;
 	bool	     sample_time;
 	bool	     sample_time_set;
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
index 7e62c46d7a20..c63a919eda98 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -80,11 +80,6 @@
     "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
   },
   {,
-    "EventCode": "0x400F0",
-    "EventName": "PM_LD_MISS_L1",
-    "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
-  },
-  {,
     "EventCode": "0x2E01A",
     "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
     "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete"
@@ -374,4 +369,4 @@
     "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
     "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
   }
-]
-\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
index 00f3d2a21f31..54cc3be00fc2 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/other.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -605,11 +605,6 @@
     "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
   },
   {,
-    "EventCode": "0x3689E",
-    "EventName": "PM_L2_RTY_LD",
-    "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
-  },
-  {,
     "EventCode": "0xE08C",
     "EventName": "PM_LSU0_ERAT_HIT",
     "BriefDescription": "Primary ERAT hit.  There is no secondary ERAT"
@@ -715,11 +710,6 @@
     "BriefDescription": "Lifetime, sample of RD machine 0 valid"
   },
   {,
-    "EventCode": "0x468B4",
-    "EventName": "PM_L3_RD0_BUSY",
-    "BriefDescription": "Lifetime, sample of RD machine 0 valid"
-  },
-  {,
     "EventCode": "0x46080",
     "EventName": "PM_L2_DISP_ALL_L2MISS",
     "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)"
@@ -850,21 +840,11 @@
     "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
   },
   {,
-    "EventCode": "0x2608C",
-    "EventName": "PM_RC0_BUSY",
-    "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
-  },
-  {,
     "EventCode": "0x36082",
     "EventName": "PM_L2_LD_DISP",
     "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)."
   },
   {,
-    "EventCode": "0x1609E",
-    "EventName": "PM_L2_LD_DISP",
-    "BriefDescription": "All successful D side load dispatches for this thread (L2 miss + L2 hits)"
-  },
-  {,
     "EventCode": "0xF8B0",
     "EventName": "PM_L3_SW_PREF",
     "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest"
@@ -1040,11 +1020,6 @@
     "BriefDescription": "L3 castouts in Mepf state for this thread"
   },
   {,
-    "EventCode": "0x168A0",
-    "EventName": "PM_L3_CO_MEPF",
-    "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory).  The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
-  },
-  {,
     "EventCode": "0x460A2",
     "EventName": "PM_L3_LAT_CI_HIT",
     "BriefDescription": "L3 Lateral Castins Hit"
@@ -1150,11 +1125,6 @@
     "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
   },
   {,
-    "EventCode": "0x4689E",
-    "EventName": "PM_L2_RTY_ST",
-    "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
-  },
-  {,
     "EventCode": "0x24040",
     "EventName": "PM_INST_FROM_L2_MEPF",
     "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)"
@@ -1255,11 +1225,6 @@
     "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
   },
   {,
-    "EventCode": "0x4608C",
-    "EventName": "PM_CO0_BUSY",
-    "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
-  },
-  {,
     "EventCode": "0x2C122",
     "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
     "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load"
@@ -1395,11 +1360,6 @@
     "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
   },
   {,
-    "EventCode": "0x40006",
-    "EventName": "PM_ISLB_MISS",
-    "BriefDescription": "Number of ISLB misses for this thread"
-  },
-  {,
     "EventCode": "0xD8A8",
     "EventName": "PM_ISLB_MISS",
     "BriefDescription": "Instruction SLB miss - Total of all segment sizes"
@@ -1515,11 +1475,6 @@
     "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)."
   },
   {,
-    "EventCode": "0x3609E",
-    "EventName": "PM_L2_INST",
-    "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
-  },
-  {,
     "EventCode": "0x3504C",
     "EventName": "PM_IPTEG_FROM_DL4",
     "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request"
@@ -1690,11 +1645,6 @@
     "BriefDescription": "All successful I-or-D side load dispatches for this thread that were L2 hits (excludes i_l2mru_tch_reqs)"
   },
   {,
-    "EventCode": "0x2609E",
-    "EventName": "PM_L2_LD_HIT",
-    "BriefDescription": "All successful D side load dispatches for this thread that were L2 hits for this thread"
-  },
-  {,
     "EventCode": "0x168AC",
     "EventName": "PM_L3_CI_USAGE",
     "BriefDescription": "Rotating sample of 16 CI or CO actives"
@@ -1795,21 +1745,11 @@
     "BriefDescription": "Rotating sample of 8 WI valid"
   },
   {,
-    "EventCode": "0x260B6",
-    "EventName": "PM_L3_WI0_BUSY",
-    "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
-  },
-  {,
     "EventCode": "0x368AC",
     "EventName": "PM_L3_CO0_BUSY",
     "BriefDescription": "Lifetime, sample of CO machine 0 valid"
   },
   {,
-    "EventCode": "0x468AC",
-    "EventName": "PM_L3_CO0_BUSY",
-    "BriefDescription": "Lifetime, sample of CO machine 0 valid"
-  },
-  {,
     "EventCode": "0x2E040",
     "EventName": "PM_DPTEG_FROM_L2_MEPF",
     "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
@@ -1840,11 +1780,6 @@
     "BriefDescription": "L3 PF received retry port 0, every retry counted"
   },
   {,
-    "EventCode": "0x260AE",
-    "EventName": "PM_L3_P0_PF_RTY",
-    "BriefDescription": "L3 PF received retry port 0, every retry counted"
-  },
-  {,
     "EventCode": "0x268B2",
     "EventName": "PM_L3_LOC_GUESS_WRONG",
     "BriefDescription": "Initial scope=node (LNS) but data from out side local node (near or far or rem). Prediction too Low"
@@ -1895,11 +1830,6 @@
     "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
   },
   {,
-    "EventCode": "0x460AC",
-    "EventName": "PM_L3_SN0_BUSY",
-    "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
-  },
-  {,
     "EventCode": "0x3005C",
     "EventName": "PM_BFU_BUSY",
     "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity"
@@ -1935,11 +1865,6 @@
     "BriefDescription": "Lifetime, sample of PF machine 0 valid"
   },
   {,
-    "EventCode": "0x460B4",
-    "EventName": "PM_L3_PF0_BUSY",
-    "BriefDescription": "Lifetime, sample of PF machine 0 valid"
-  },
-  {,
     "EventCode": "0xC0B0",
     "EventName": "PM_LSU_FLUSH_UE",
     "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time"
@@ -2085,11 +2010,6 @@
     "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
   },
   {,
-    "EventCode": "0x468AE",
-    "EventName": "PM_L3_P1_CO_RTY",
-    "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
-  },
-  {,
     "EventCode": "0xC0AC",
     "EventName": "PM_LSU_FLUSH_EMSH",
     "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
@@ -2195,11 +2115,6 @@
     "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
   },
   {,
-    "EventCode": "0x46886",
-    "EventName": "PM_L2_SN_M_WR_DONE",
-    "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
-  },
-  {,
     "EventCode": "0x489C",
     "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
     "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken.  Counted at completion time"
@@ -2290,21 +2205,11 @@
     "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
   },
   {,
-    "EventCode": "0x26090",
-    "EventName": "PM_SN0_BUSY",
-    "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
-  },
-  {,
     "EventCode": "0x360AE",
     "EventName": "PM_L3_P0_CO_RTY",
     "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
   },
   {,
-    "EventCode": "0x460AE",
-    "EventName": "PM_L3_P0_CO_RTY",
-    "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
-  },
-  {,
     "EventCode": "0x168A8",
     "EventName": "PM_L3_WI_USAGE",
     "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid"
@@ -2340,26 +2245,11 @@
     "BriefDescription": "L3 PF received retry port 1, every retry counted"
   },
   {,
-    "EventCode": "0x268AE",
-    "EventName": "PM_L3_P1_PF_RTY",
-    "BriefDescription": "L3 PF received retry port 3, every retry counted"
-  },
-  {,
     "EventCode": "0x46082",
     "EventName": "PM_L2_ST_DISP",
     "BriefDescription": "All successful D-side store dispatches for this thread "
   },
   {,
-    "EventCode": "0x1689E",
-    "EventName": "PM_L2_ST_DISP",
-    "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
-  },
-  {,
-    "EventCode": "0x36880",
-    "EventName": "PM_L2_INST_MISS",
-    "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
-  },
-  {,
     "EventCode": "0x4609E",
     "EventName": "PM_L2_INST_MISS",
     "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)"
@@ -2430,11 +2320,6 @@
     "BriefDescription": "# PPC Dispatched"
   },
   {,
-    "EventCode": "0x300F2",
-    "EventName": "PM_INST_DISP",
-    "BriefDescription": "# PPC Dispatched"
-  },
-  {,
     "EventCode": "0x4E05E",
     "EventName": "PM_TM_OUTER_TBEGIN_DISP",
     "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions"
@@ -2460,11 +2345,6 @@
     "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
   },
   {,
-    "EventCode": "0x2689E",
-    "EventName": "PM_L2_ST_HIT",
-    "BriefDescription": "All successful D-side store dispatches that were L2 hits for this thread"
-  },
-  {,
     "EventCode": "0x360A8",
     "EventName": "PM_L3_CO",
     "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))"
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
index 47a82568a8df..bc2db636dabf 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -420,11 +420,6 @@
     "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
   },
   {,
-    "EventCode": "0x10016",
-    "EventName": "PM_DSLB_MISS",
-    "BriefDescription": "Data SLB Miss - Total of all segment sizes"
-  },
-  {,
     "EventCode": "0xD0A8",
     "EventName": "PM_DSLB_MISS",
     "BriefDescription": "Data SLB Miss - Total of all segment sizes"
@@ -554,4 +549,4 @@
     "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
     "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
   }
-]
-\ No newline at end of file
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
index a2c95a99e168..3ef8a10aac86 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -5,11 +5,6 @@
     "BriefDescription": "Branches that are not strongly biased"
   },
   {,
-    "EventCode": "0x40036",
-    "EventName": "PM_BR_2PATH",
-    "BriefDescription": "Branches that are not strongly biased"
-  },
-  {,
     "EventCode": "0x40056",
     "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
     "BriefDescription": "Local memory above threshold for LSU medium"
@@ -124,4 +119,4 @@
     "EventName": "PM_1FLOP_CMPL",
     "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
   }
-]
-\ No newline at end of file
+]
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 761c5a448c56..466a462b26d1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -237,6 +237,11 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode,
 
 	thread__find_addr_map(thread, cpumode, MAP__FUNCTION, addr, &al);
 	if (!al.map || !al.map->dso) {
+		if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
+			pr_debug("Hypervisor address can not be resolved - skipping\n");
+			return 0;
+		}
+
 		pr_debug("thread__find_addr_map failed\n");
 		return -1;
 	}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 2a7b9b47bbcb..9ba1d216a89f 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -6,7 +6,7 @@
 #include "debug.h"
 #include "machine.h"
 #include "event.h"
-#include "unwind.h"
+#include "../util/unwind.h"
 #include "perf_regs.h"
 #include "map.h"
 #include "thread.h"
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 6d028f42b3cf..c3858487159d 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -141,6 +141,9 @@ static bool samples_same(const struct perf_sample *s1,
 		}
 	}
 
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		COMP(phys_addr);
+
 	return true;
 }
 
@@ -206,6 +209,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
 			.mask	= sample_regs,
 			.regs	= regs,
 		},
+		.phys_addr	= 113,
 	};
 	struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
 	struct perf_sample sample_out;
@@ -305,7 +309,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
 	 * were added.  Please actually update the test rather than just change
 	 * the condition below.
 	 */
-	if (PERF_SAMPLE_MAX > PERF_SAMPLE_REGS_INTR << 1) {
+	if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
 		pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
 		return -1;
 	}
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index ba0aee576a2b..786fecaf578e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -829,7 +829,8 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"q/ESC/CTRL+C  Exit\n\n"
 		"ENTER         Go to target\n"
 		"ESC           Exit\n"
-		"H             Cycle thru hottest instructions\n"
+		"H             Go to hottest instruction\n"
+		"TAB/shift+TAB Cycle thru hottest instructions\n"
 		"j             Toggle showing jump to target arrows\n"
 		"J             Toggle showing number of jump sources on targets\n"
 		"n             Search next string\n"
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f4bc2462bc2c..13dfb0a0bdeb 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -931,12 +931,8 @@ static int hist_browser__show_callchain_list(struct hist_browser *browser,
 				       browser->show_dso);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (need_percent)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index a0f24c7115c5..ae91c8148edf 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -1,3 +1,4 @@
+#include <linux/kernel.h>
 #include "../cache.h"
 #include "progress.h"
 
@@ -14,10 +15,14 @@ struct ui_progress_ops *ui_progress__ops = &null_progress__ops;
 
 void ui_progress__update(struct ui_progress *p, u64 adv)
 {
+	u64 last = p->curr;
+
 	p->curr += adv;
 
 	if (p->curr >= p->next) {
-		p->next += p->step;
+		u64 nr = DIV_ROUND_UP(p->curr - last, p->step);
+
+		p->next += nr * p->step;
 		ui_progress__ops->update(p);
 	}
 }
@@ -25,7 +30,7 @@ void ui_progress__update(struct ui_progress *p, u64 adv)
 void ui_progress__init(struct ui_progress *p, u64 total, const char *title)
 {
 	p->curr = 0;
-	p->next = p->step = total / 16;
+	p->next = p->step = total / 16 ?: 1;
 	p->total = total;
 	p->title = title;
 
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 5c95b8301c67..8bdb7a500181 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -124,12 +124,8 @@ static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
 	str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
 
 	if (symbol_conf.show_branchflag_count) {
-		if (!period)
-			callchain_list_counts__printf_value(node, chain, NULL,
-							    buf, sizeof(buf));
-		else
-			callchain_list_counts__printf_value(NULL, chain, NULL,
-							    buf, sizeof(buf));
+		callchain_list_counts__printf_value(chain, NULL,
+						    buf, sizeof(buf));
 
 		if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
 			str = "Not enough memory!";
@@ -313,7 +309,7 @@ static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
 
 			if (symbol_conf.show_branchflag_count)
 				ret += callchain_list_counts__printf_value(
-						NULL, chain, fp, NULL, 0);
+						chain, fp, NULL, 0);
 			ret += fprintf(fp, "\n");
 
 			if (++entries_printed == callchain_param.print_limit)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index f320b0777e0d..be09d77cade0 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -65,8 +65,6 @@ static int parse_callchain_mode(const char *value)
 		callchain_param.mode = CHAIN_FOLDED;
 		return 0;
 	}
-
-	pr_err("Invalid callchain mode: %s\n", value);
 	return -1;
 }
 
@@ -82,8 +80,6 @@ static int parse_callchain_order(const char *value)
 		callchain_param.order_set = true;
 		return 0;
 	}
-
-	pr_err("Invalid callchain order: %s\n", value);
 	return -1;
 }
 
@@ -105,8 +101,6 @@ static int parse_callchain_sort_key(const char *value)
 		callchain_param.branch_callstack = 1;
 		return 0;
 	}
-
-	pr_err("Invalid callchain sort key: %s\n", value);
 	return -1;
 }
 
@@ -124,8 +118,6 @@ static int parse_callchain_value(const char *value)
 		callchain_param.value = CCVAL_COUNT;
 		return 0;
 	}
-
-	pr_err("Invalid callchain config key: %s\n", value);
 	return -1;
 }
 
@@ -319,12 +311,27 @@ int perf_callchain_config(const char *var, const char *value)
 
 		return ret;
 	}
-	if (!strcmp(var, "print-type"))
-		return parse_callchain_mode(value);
-	if (!strcmp(var, "order"))
-		return parse_callchain_order(value);
-	if (!strcmp(var, "sort-key"))
-		return parse_callchain_sort_key(value);
+	if (!strcmp(var, "print-type")){
+		int ret;
+		ret = parse_callchain_mode(value);
+		if (ret == -1)
+			pr_err("Invalid callchain mode: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "order")){
+		int ret;
+		ret = parse_callchain_order(value);
+		if (ret == -1)
+			pr_err("Invalid callchain order: %s\n", value);
+		return ret;
+	}
+	if (!strcmp(var, "sort-key")){
+		int ret;
+		ret = parse_callchain_sort_key(value);
+		if (ret == -1)
+			pr_err("Invalid callchain sort key: %s\n", value);
+		return ret;
+	}
 	if (!strcmp(var, "threshold")) {
 		callchain_param.min_percent = strtod(value, &endptr);
 		if (value == endptr) {
@@ -588,7 +595,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
 				call->cycles_count =
 					cursor_node->branch_flags.cycles;
 				call->iter_count = cursor_node->nr_loop_iter;
-				call->samples_count = cursor_node->samples;
+				call->iter_cycles = cursor_node->iter_cycles;
 			}
 		}
 
@@ -722,7 +729,7 @@ static enum match_result match_chain(struct callchain_cursor_node *node,
 				cnode->cycles_count +=
 					node->branch_flags.cycles;
 				cnode->iter_count += node->nr_loop_iter;
-				cnode->samples_count += node->samples;
+				cnode->iter_cycles += node->iter_cycles;
 			}
 		}
 
@@ -998,7 +1005,7 @@ int callchain_merge(struct callchain_cursor *cursor,
 int callchain_cursor_append(struct callchain_cursor *cursor,
 			    u64 ip, struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from)
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from)
 {
 	struct callchain_cursor_node *node = *cursor->last;
 
@@ -1016,7 +1023,7 @@ int callchain_cursor_append(struct callchain_cursor *cursor,
 	node->sym = sym;
 	node->branch = branch;
 	node->nr_loop_iter = nr_loop_iter;
-	node->samples = samples;
+	node->iter_cycles = iter_cycles;
 
 	if (flags)
 		memcpy(&node->branch_flags, flags,
@@ -1306,7 +1313,7 @@ static int branch_to_str(char *bf, int bfsize,
 static int branch_from_str(char *bf, int bfsize,
 			   u64 branch_count,
 			   u64 cycles_count, u64 iter_count,
-			   u64 samples_count)
+			   u64 iter_cycles)
 {
 	int printed = 0, i = 0;
 	u64 cycles;
@@ -1318,9 +1325,13 @@ static int branch_from_str(char *bf, int bfsize,
 				bf + printed, bfsize - printed);
 	}
 
-	if (iter_count && samples_count) {
-		printed += count_pri64_printf(i++, "iterations",
-				iter_count / samples_count,
+	if (iter_count) {
+		printed += count_pri64_printf(i++, "iter",
+				iter_count,
+				bf + printed, bfsize - printed);
+
+		printed += count_pri64_printf(i++, "avg_cycles",
+				iter_cycles / iter_count,
 				bf + printed, bfsize - printed);
 	}
 
@@ -1333,7 +1344,7 @@ static int branch_from_str(char *bf, int bfsize,
 static int counts_str_build(char *bf, int bfsize,
 			     u64 branch_count, u64 predicted_count,
 			     u64 abort_count, u64 cycles_count,
-			     u64 iter_count, u64 samples_count,
+			     u64 iter_count, u64 iter_cycles,
 			     struct branch_type_stat *brtype_stat)
 {
 	int printed;
@@ -1346,7 +1357,7 @@ static int counts_str_build(char *bf, int bfsize,
 				predicted_count, abort_count, brtype_stat);
 	} else {
 		printed = branch_from_str(bf, bfsize, branch_count,
-				cycles_count, iter_count, samples_count);
+				cycles_count, iter_count, iter_cycles);
 	}
 
 	if (!printed)
@@ -1358,14 +1369,14 @@ static int counts_str_build(char *bf, int bfsize,
 static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 				   u64 branch_count, u64 predicted_count,
 				   u64 abort_count, u64 cycles_count,
-				   u64 iter_count, u64 samples_count,
+				   u64 iter_count, u64 iter_cycles,
 				   struct branch_type_stat *brtype_stat)
 {
 	char str[256];
 
 	counts_str_build(str, sizeof(str), branch_count,
 			 predicted_count, abort_count, cycles_count,
-			 iter_count, samples_count, brtype_stat);
+			 iter_count, iter_cycles, brtype_stat);
 
 	if (fp)
 		return fprintf(fp, "%s", str);
@@ -1373,31 +1384,23 @@ static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
 	return scnprintf(bf, bfsize, "%s", str);
 }
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize)
 {
 	u64 branch_count, predicted_count;
 	u64 abort_count, cycles_count;
-	u64 iter_count = 0, samples_count = 0;
+	u64 iter_count, iter_cycles;
 
 	branch_count = clist->branch_count;
 	predicted_count = clist->predicted_count;
 	abort_count = clist->abort_count;
 	cycles_count = clist->cycles_count;
-
-	if (node) {
-		struct callchain_list *call;
-
-		list_for_each_entry(call, &node->val, list) {
-			iter_count += call->iter_count;
-			samples_count += call->samples_count;
-		}
-	}
+	iter_count = clist->iter_count;
+	iter_cycles = clist->iter_cycles;
 
 	return callchain_counts_printf(fp, bf, bfsize, branch_count,
 				       predicted_count, abort_count,
-				       cycles_count, iter_count, samples_count,
+				       cycles_count, iter_count, iter_cycles,
 				       &clist->brtype_stat);
 }
 
@@ -1523,7 +1526,8 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
 
 		rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
 					     node->branch, &node->branch_flags,
-					     node->nr_loop_iter, node->samples,
+					     node->nr_loop_iter,
+					     node->iter_cycles,
 					     node->branch_from);
 		if (rc)
 			break;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 97738201464a..1ed6fc61d0a5 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -119,7 +119,7 @@ struct callchain_list {
 	u64			abort_count;
 	u64			cycles_count;
 	u64			iter_count;
-	u64			samples_count;
+	u64			iter_cycles;
 	struct branch_type_stat brtype_stat;
 	char		       *srcline;
 	struct list_head	list;
@@ -139,7 +139,7 @@ struct callchain_cursor_node {
 	struct branch_flags		branch_flags;
 	u64				branch_from;
 	int				nr_loop_iter;
-	int				samples;
+	u64				iter_cycles;
 	struct callchain_cursor_node	*next;
 };
 
@@ -201,7 +201,7 @@ static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
 int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
 			    struct map *map, struct symbol *sym,
 			    bool branch, struct branch_flags *flags,
-			    int nr_loop_iter, int samples, u64 branch_from);
+			    int nr_loop_iter, u64 iter_cycles, u64 branch_from);
 
 /* Close a cursor writing session. Initialize for the reader */
 static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
@@ -282,8 +282,7 @@ char *callchain_node__scnprintf_value(struct callchain_node *node,
 int callchain_node__fprintf_value(struct callchain_node *node,
 				  FILE *fp, u64 total);
 
-int callchain_list_counts__printf_value(struct callchain_node *node,
-					struct callchain_list *clist,
+int callchain_list_counts__printf_value(struct callchain_list *clist,
 					FILE *fp, char *bf, int bfsize);
 
 void free_callchain(struct callchain_root *root);
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index e84bbc8ec058..263f5a906ba5 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -10,6 +10,16 @@
 #include "util.h"
 #include "debug.h"
 
+#ifndef O_CLOEXEC
+#ifdef __sparc__
+#define O_CLOEXEC	0x400000
+#elif defined(__alpha__) || defined(__hppa__)
+#define O_CLOEXEC	010000000
+#else
+#define O_CLOEXEC	02000000
+#endif
+#endif
+
 static bool check_pipe(struct perf_data_file *file)
 {
 	struct stat st;
@@ -96,7 +106,8 @@ static int open_file_write(struct perf_data_file *file)
 	if (check_backup(file))
 		return -1;
 
-	fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC, S_IRUSR|S_IWUSR);
+	fd = open(file->path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
+		  S_IRUSR|S_IWUSR);
 
 	if (fd < 0)
 		pr_err("failed to open %s : %s\n", file->path,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 423ac82605f3..ee7bcc898d35 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -200,6 +200,7 @@ struct perf_sample {
 	u32 cpu;
 	u32 raw_size;
 	u64 data_src;
+	u64 phys_addr;
 	u32 flags;
 	u16 insn_len;
 	u8  cpumode;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d9bd632ed7db..0dccdb89572c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -271,12 +271,17 @@ struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
 	return evsel;
 }
 
+static bool perf_event_can_profile_kernel(void)
+{
+	return geteuid() == 0 || perf_event_paranoid() == -1;
+}
+
 struct perf_evsel *perf_evsel__new_cycles(bool precise)
 {
 	struct perf_event_attr attr = {
 		.type	= PERF_TYPE_HARDWARE,
 		.config	= PERF_COUNT_HW_CPU_CYCLES,
-		.exclude_kernel	= geteuid() != 0,
+		.exclude_kernel	= !perf_event_can_profile_kernel(),
 	};
 	struct perf_evsel *evsel;
 
@@ -955,6 +960,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
 	if (opts->sample_address)
 		perf_evsel__set_sample_bit(evsel, DATA_SRC);
 
+	if (opts->sample_phys_addr)
+		perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+
 	if (opts->no_buffering) {
 		attr->watermark = 0;
 		attr->wakeup_events = 1;
@@ -1464,7 +1472,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
 		bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
 		bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
 		bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
-		bit_name(WEIGHT),
+		bit_name(WEIGHT), bit_name(PHYS_ADDR),
 		{ .name = NULL, }
 	};
 #undef bit_name
@@ -2206,6 +2214,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
 		}
 	}
 
+	data->phys_addr = 0;
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		data->phys_addr = *array;
+		array++;
+	}
+
 	return 0;
 }
 
@@ -2311,6 +2325,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
 		}
 	}
 
+	if (type & PERF_SAMPLE_PHYS_ADDR)
+		result += sizeof(u64);
+
 	return result;
 }
 
@@ -2500,6 +2517,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 		}
 	}
 
+	if (type & PERF_SAMPLE_PHYS_ADDR) {
+		*array = sample->phys_addr;
+		array++;
+	}
+
 	return 0;
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 351d3b2d8887..dd2c4b5112a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -131,6 +131,7 @@ struct perf_evsel {
 	bool			cmdline_group_boundary;
 	struct list_head	config_terms;
 	int			bpf_fd;
+	bool			auto_merge_stats;
 	bool			merged_stat;
 	const char *		metric_expr;
 	const char *		metric_name;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 9453b2e27015..e60d8d8ea4c2 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -167,6 +167,10 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 			symlen = unresolved_col_width + 4 + 2;
 			hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
 		}
+
+		hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+				   unresolved_col_width + 4 + 2);
+
 	} else {
 		symlen = unresolved_col_width + 4 + 2;
 		hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ee3670a388df..e60dda26a920 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -47,6 +47,7 @@ enum hist_column {
 	HISTC_GLOBAL_WEIGHT,
 	HISTC_MEM_DADDR_SYMBOL,
 	HISTC_MEM_DADDR_DSO,
+	HISTC_MEM_PHYS_DADDR,
 	HISTC_MEM_LOCKED,
 	HISTC_MEM_TLB,
 	HISTC_MEM_LVL,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 5c8eacaca4f4..df709363ef69 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1635,10 +1635,12 @@ static void ip__resolve_ams(struct thread *thread,
 	ams->al_addr = al.addr;
 	ams->sym = al.sym;
 	ams->map = al.map;
+	ams->phys_addr = 0;
 }
 
 static void ip__resolve_data(struct thread *thread,
-			     u8 m, struct addr_map_symbol *ams, u64 addr)
+			     u8 m, struct addr_map_symbol *ams,
+			     u64 addr, u64 phys_addr)
 {
 	struct addr_location al;
 
@@ -1658,6 +1660,7 @@ static void ip__resolve_data(struct thread *thread,
 	ams->al_addr = al.addr;
 	ams->sym = al.sym;
 	ams->map = al.map;
+	ams->phys_addr = phys_addr;
 }
 
 struct mem_info *sample__resolve_mem(struct perf_sample *sample,
@@ -1669,12 +1672,18 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
 		return NULL;
 
 	ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
-	ip__resolve_data(al->thread, al->cpumode, &mi->daddr, sample->addr);
+	ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+			 sample->addr, sample->phys_addr);
 	mi->data_src.val = sample->data_src;
 
 	return mi;
 }
 
+struct iterations {
+	int nr_loop_iter;
+	u64 cycles;
+};
+
 static int add_callchain_ip(struct thread *thread,
 			    struct callchain_cursor *cursor,
 			    struct symbol **parent,
@@ -1683,11 +1692,12 @@ static int add_callchain_ip(struct thread *thread,
 			    u64 ip,
 			    bool branch,
 			    struct branch_flags *flags,
-			    int nr_loop_iter,
-			    int samples,
+			    struct iterations *iter,
 			    u64 branch_from)
 {
 	struct addr_location al;
+	int nr_loop_iter = 0;
+	u64 iter_cycles = 0;
 
 	al.filtered = 0;
 	al.sym = NULL;
@@ -1737,9 +1747,15 @@ static int add_callchain_ip(struct thread *thread,
 
 	if (symbol_conf.hide_unresolved && al.sym == NULL)
 		return 0;
+
+	if (iter) {
+		nr_loop_iter = iter->nr_loop_iter;
+		iter_cycles = iter->cycles;
+	}
+
 	return callchain_cursor_append(cursor, al.addr, al.map, al.sym,
-				       branch, flags, nr_loop_iter, samples,
-				       branch_from);
+				       branch, flags, nr_loop_iter,
+				       iter_cycles, branch_from);
 }
 
 struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
@@ -1760,6 +1776,18 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 	return bi;
 }
 
+static void save_iterations(struct iterations *iter,
+			    struct branch_entry *be, int nr)
+{
+	int i;
+
+	iter->nr_loop_iter = nr;
+	iter->cycles = 0;
+
+	for (i = 0; i < nr; i++)
+		iter->cycles += be[i].flags.cycles;
+}
+
 #define CHASHSZ 127
 #define CHASHBITS 7
 #define NO_ENTRY 0xff
@@ -1767,7 +1795,8 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
 #define PERF_MAX_BRANCH_DEPTH 127
 
 /* Remove loops. */
-static int remove_loops(struct branch_entry *l, int nr)
+static int remove_loops(struct branch_entry *l, int nr,
+			struct iterations *iter)
 {
 	int i, j, off;
 	unsigned char chash[CHASHSZ];
@@ -1792,8 +1821,18 @@ static int remove_loops(struct branch_entry *l, int nr)
 					break;
 				}
 			if (is_loop) {
-				memmove(l + i, l + i + off,
-					(nr - (i + off)) * sizeof(*l));
+				j = nr - (i + off);
+				if (j > 0) {
+					save_iterations(iter + i + off,
+						l + i, off);
+
+					memmove(iter + i, iter + i + off,
+						j * sizeof(*iter));
+
+					memmove(l + i, l + i + off,
+						j * sizeof(*l));
+				}
+
 				nr -= off;
 			}
 		}
@@ -1883,7 +1922,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
 
 			err = add_callchain_ip(thread, cursor, parent,
 					       root_al, &cpumode, ip,
-					       branch, flags, 0, 0,
+					       branch, flags, NULL,
 					       branch_from);
 			if (err)
 				return (err < 0) ? err : 0;
@@ -1909,7 +1948,6 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	int i, j, err, nr_entries;
 	int skip_idx = -1;
 	int first_call = 0;
-	int nr_loop_iter;
 
 	if (chain)
 		chain_nr = chain->nr;
@@ -1942,6 +1980,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 	if (branch && callchain_param.branch_callstack) {
 		int nr = min(max_stack, (int)branch->nr);
 		struct branch_entry be[nr];
+		struct iterations iter[nr];
 
 		if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
 			pr_warning("corrupted branch chain. skipping...\n");
@@ -1972,38 +2011,21 @@ static int thread__resolve_callchain_sample(struct thread *thread,
 				be[i] = branch->entries[branch->nr - i - 1];
 		}
 
-		nr_loop_iter = nr;
-		nr = remove_loops(be, nr);
-
-		/*
-		 * Get the number of iterations.
-		 * It's only approximation, but good enough in practice.
-		 */
-		if (nr_loop_iter > nr)
-			nr_loop_iter = nr_loop_iter - nr + 1;
-		else
-			nr_loop_iter = 0;
+		memset(iter, 0, sizeof(struct iterations) * nr);
+		nr = remove_loops(be, nr, iter);
 
 		for (i = 0; i < nr; i++) {
-			if (i == nr - 1)
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       nr_loop_iter, 1,
-						       be[i].from);
-			else
-				err = add_callchain_ip(thread, cursor, parent,
-						       root_al,
-						       NULL, be[i].to,
-						       true, &be[i].flags,
-						       0, 0, be[i].from);
+			err = add_callchain_ip(thread, cursor, parent,
+					       root_al,
+					       NULL, be[i].to,
+					       true, &be[i].flags,
+					       NULL, be[i].from);
 
 			if (!err)
 				err = add_callchain_ip(thread, cursor, parent, root_al,
 						       NULL, be[i].from,
 						       true, &be[i].flags,
-						       0, 0, 0);
+						       &iter[i], 0);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -2037,7 +2059,7 @@ check_calls:
 
 		err = add_callchain_ip(thread, cursor, parent,
 				       root_al, &cpumode, ip,
-				       false, NULL, 0, 0, 0);
+				       false, NULL, NULL, 0);
 
 		if (err)
 			return (err < 0) ? err : 0;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index f44aeba51d1f..f6257fb4f08c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -310,7 +310,7 @@ static struct perf_evsel *
 __add_event(struct list_head *list, int *idx,
 	    struct perf_event_attr *attr,
 	    char *name, struct cpu_map *cpus,
-	    struct list_head *config_terms)
+	    struct list_head *config_terms, bool auto_merge_stats)
 {
 	struct perf_evsel *evsel;
 
@@ -324,6 +324,7 @@ __add_event(struct list_head *list, int *idx,
 	evsel->cpus        = cpu_map__get(cpus);
 	evsel->own_cpus    = cpu_map__get(cpus);
 	evsel->system_wide = !!cpus;
+	evsel->auto_merge_stats = auto_merge_stats;
 
 	if (name)
 		evsel->name = strdup(name);
@@ -339,7 +340,7 @@ static int add_event(struct list_head *list, int *idx,
 		     struct perf_event_attr *attr, char *name,
 		     struct list_head *config_terms)
 {
-	return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM;
+	return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
 }
 
 static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
@@ -1209,9 +1210,9 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
 			 get_config_name(head_config), &config_terms);
 }
 
-int parse_events_add_pmu(struct parse_events_state *parse_state,
+static int __parse_events_add_pmu(struct parse_events_state *parse_state,
 			 struct list_head *list, char *name,
-			 struct list_head *head_config)
+			 struct list_head *head_config, bool auto_merge_stats)
 {
 	struct perf_event_attr attr;
 	struct perf_pmu_info info;
@@ -1232,7 +1233,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	if (!head_config) {
 		attr.type = pmu->type;
-		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL);
+		evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu->cpus, NULL, auto_merge_stats);
 		return evsel ? 0 : -ENOMEM;
 	}
 
@@ -1254,7 +1255,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 	evsel = __add_event(list, &parse_state->idx, &attr,
 			    get_config_name(head_config), pmu->cpus,
-			    &config_terms);
+			    &config_terms, auto_merge_stats);
 	if (evsel) {
 		evsel->unit = info.unit;
 		evsel->scale = info.scale;
@@ -1267,6 +1268,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 	return evsel ? 0 : -ENOMEM;
 }
 
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config)
+{
+	return __parse_events_add_pmu(parse_state, list, name, head_config, false);
+}
+
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 			       char *str, struct list_head **listp)
 {
@@ -1296,8 +1304,8 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 					return -1;
 				list_add_tail(&term->list, head);
 
-				if (!parse_events_add_pmu(parse_state, list,
-						  pmu->name, head)) {
+				if (!__parse_events_add_pmu(parse_state, list,
+							    pmu->name, head, true)) {
 					pr_debug("%s -> %s/%s/\n", str,
 						 pmu->name, alias->str);
 					ok++;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ac863691605f..a7ebd9fe8e40 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1120,6 +1120,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
 	if (sample_type & PERF_SAMPLE_DATA_SRC)
 		printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
 
+	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+		printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
 	if (sample_type & PERF_SAMPLE_TRANSACTION)
 		printf("... transaction: %" PRIx64 "\n", sample->transaction);
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 12359bd986db..eb3ab902a1c0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -1316,6 +1316,47 @@ struct sort_entry sort_mem_dcacheline = {
 };
 
 static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	uint64_t l = 0, r = 0;
+
+	if (left->mem_info)
+		l = left->mem_info->daddr.phys_addr;
+	if (right->mem_info)
+		r = right->mem_info->daddr.phys_addr;
+
+	return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+					   size_t size, unsigned int width)
+{
+	uint64_t addr = 0;
+	size_t ret = 0;
+	size_t len = BITS_PER_LONG / 4;
+
+	addr = he->mem_info->daddr.phys_addr;
+
+	ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+	ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+	if (ret > width)
+		bf[width] = '\0';
+
+	return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+	.se_header	= "Data Physical Address",
+	.se_cmp		= sort__phys_daddr_cmp,
+	.se_snprintf	= hist_entry__phys_daddr_snprintf,
+	.se_width_idx	= HISTC_MEM_PHYS_DADDR,
+};
+
+static int64_t
 sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	if (!left->branch_info || !right->branch_info)
@@ -1547,6 +1588,7 @@ static struct sort_dimension memory_sort_dimensions[] = {
 	DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
 	DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
 	DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+	DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
 };
 
 #undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index b7c75597e18f..f36dc4980a6c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -245,6 +245,7 @@ enum sort_type {
 	SORT_MEM_SNOOP,
 	SORT_MEM_DCACHELINE,
 	SORT_MEM_IADDR_SYMBOL,
+	SORT_MEM_PHYS_DADDR,
 };
 
 /*
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 5c39f420111e..9cf781f0d8a2 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -810,12 +810,6 @@ static u64 ref_reloc(struct kmap *kmap)
 void __weak arch__sym_update(struct symbol *s __maybe_unused,
 		GElf_Sym *sym __maybe_unused) { }
 
-void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr,
-				       struct map *map __maybe_unused)
-{
-	sym->st_value -= shdr->sh_addr - shdr->sh_offset;
-}
-
 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 		  struct symsrc *runtime_ss, int kmodule)
 {
@@ -996,7 +990,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 
 			/* Adjust symbol to map to file offset */
 			if (adjust_kernel_syms)
-				arch__adjust_sym_map_offset(&sym, &shdr, map);
+				sym.st_value -= shdr.sh_addr - shdr.sh_offset;
 
 			if (strcmp(section_name,
 				   (curr_dso->short_name +
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index d00a012cfdfb..aad99e7e179b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -186,6 +186,7 @@ struct addr_map_symbol {
 	struct symbol *sym;
 	u64	      addr;
 	u64	      al_addr;
+	u64	      phys_addr;
 };
 
 struct branch_info {
@@ -343,9 +344,6 @@ int setup_intlist(struct intlist **list, const char *list_str,
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
 void arch__sym_update(struct symbol *s, GElf_Sym *sym);
-void arch__adjust_sym_map_offset(GElf_Sym *sym,
-				 GElf_Shdr *shdr __maybe_unused,
-				 struct map *map __maybe_unused);
 #endif
 
 #define SYMBOL_A 0
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index bbb4c1957578..6eea7cff3d4e 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -15,10 +15,11 @@
 
 #include "syscalltbl.h"
 #include <stdlib.h>
+#include <linux/compiler.h>
 
 #ifdef HAVE_SYSCALL_TABLE
-#include <linux/compiler.h>
 #include <string.h>
+#include "string2.h"
 #include "util.h"
 
 #if defined(__x86_64__)
@@ -105,6 +106,27 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
 	return sc ? sc->id : -1;
 }
 
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	int i;
+	struct syscall *syscalls = tbl->syscalls.entries;
+
+	for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+		if (strglobmatch(syscalls[i].name, syscall_glob)) {
+			*idx = i;
+			return syscalls[i].id;
+		}
+	}
+
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	*idx = -1;
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
 #else /* HAVE_SYSCALL_TABLE */
 
 #include <libaudit.h>
@@ -131,4 +153,15 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name)
 {
 	return audit_name_to_syscall(name, tbl->audit_machine);
 }
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+				  const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+	return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+	return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
 #endif /* HAVE_SYSCALL_TABLE */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index e2951510484f..e9fb8786da7c 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -17,4 +17,7 @@ void syscalltbl__delete(struct syscalltbl *tbl);
 const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
 int syscalltbl__id(struct syscalltbl *tbl, const char *name);
 
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
 #endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index 82a2ff896a95..52a39ecf5ca1 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -759,7 +759,7 @@ static acpi_status osl_list_bios_tables(void)
 
 		/* Skip NULL entries in RSDT/XSDT */
 
-		if (!table_address) {
+		if (table_address == 0) {
 			continue;
 		}
 
@@ -808,7 +808,8 @@ osl_get_bios_table(char *signature,
 	u8 number_of_tables;
 	u8 item_size;
 	u32 current_instance = 0;
-	acpi_physical_address table_address = 0;
+	acpi_physical_address table_address;
+	acpi_physical_address first_table_address = 0;
 	u32 table_length = 0;
 	acpi_status status = AE_OK;
 	u32 i;
@@ -820,9 +821,10 @@ osl_get_bios_table(char *signature,
 	    ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT) ||
 	    ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT) ||
 	    ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
-		if (instance > 0) {
-			return (AE_LIMIT);
-		}
+
+find_next_instance:
+
+		table_address = 0;
 
 		/*
 		 * Get the appropriate address, either 32-bit or 64-bit. Be very
@@ -830,41 +832,66 @@ osl_get_bios_table(char *signature,
 		 * Note: The 64-bit addresses have priority.
 		 */
 		if (ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT)) {
-			if ((gbl_fadt->header.length >= MIN_FADT_FOR_XDSDT) &&
-			    gbl_fadt->Xdsdt) {
-				table_address =
-				    (acpi_physical_address)gbl_fadt->Xdsdt;
-			} else
-			    if ((gbl_fadt->header.length >= MIN_FADT_FOR_DSDT)
-				&& gbl_fadt->dsdt) {
-				table_address =
-				    (acpi_physical_address)gbl_fadt->dsdt;
+			if (current_instance < 2) {
+				if ((gbl_fadt->header.length >=
+				     MIN_FADT_FOR_XDSDT) && gbl_fadt->Xdsdt
+				    && current_instance == 0) {
+					table_address =
+					    (acpi_physical_address)gbl_fadt->
+					    Xdsdt;
+				} else
+				    if ((gbl_fadt->header.length >=
+					 MIN_FADT_FOR_DSDT)
+					&& gbl_fadt->dsdt !=
+					first_table_address) {
+					table_address =
+					    (acpi_physical_address)gbl_fadt->
+					    dsdt;
+				}
 			}
 		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
-			if ((gbl_fadt->header.length >= MIN_FADT_FOR_XFACS) &&
-			    gbl_fadt->Xfacs) {
-				table_address =
-				    (acpi_physical_address)gbl_fadt->Xfacs;
-			} else
-			    if ((gbl_fadt->header.length >= MIN_FADT_FOR_FACS)
-				&& gbl_fadt->facs) {
-				table_address =
-				    (acpi_physical_address)gbl_fadt->facs;
+			if (current_instance < 2) {
+				if ((gbl_fadt->header.length >=
+				     MIN_FADT_FOR_XFACS) && gbl_fadt->Xfacs
+				    && current_instance == 0) {
+					table_address =
+					    (acpi_physical_address)gbl_fadt->
+					    Xfacs;
+				} else
+				    if ((gbl_fadt->header.length >=
+					 MIN_FADT_FOR_FACS)
+					&& gbl_fadt->facs !=
+					first_table_address) {
+					table_address =
+					    (acpi_physical_address)gbl_fadt->
+					    facs;
+				}
 			}
 		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) {
 			if (!gbl_revision) {
 				return (AE_BAD_SIGNATURE);
 			}
-			table_address =
-			    (acpi_physical_address)gbl_rsdp.
-			    xsdt_physical_address;
+			if (current_instance == 0) {
+				table_address =
+				    (acpi_physical_address)gbl_rsdp.
+				    xsdt_physical_address;
+			}
 		} else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) {
-			table_address =
-			    (acpi_physical_address)gbl_rsdp.
-			    rsdt_physical_address;
+			if (current_instance == 0) {
+				table_address =
+				    (acpi_physical_address)gbl_rsdp.
+				    rsdt_physical_address;
+			}
 		} else {
-			table_address = (acpi_physical_address)gbl_rsdp_address;
-			signature = ACPI_SIG_RSDP;
+			if (current_instance == 0) {
+				table_address =
+				    (acpi_physical_address)gbl_rsdp_address;
+				signature = ACPI_SIG_RSDP;
+			}
+		}
+
+		if (table_address == 0) {
+			goto exit_find_table;
 		}
 
 		/* Now we can get the requested special table */
@@ -875,6 +902,18 @@ osl_get_bios_table(char *signature,
 		}
 
 		table_length = ap_get_table_length(mapped_table);
+		if (first_table_address == 0) {
+			first_table_address = table_address;
+		}
+
+		/* Match table instance */
+
+		if (current_instance != instance) {
+			osl_unmap_table(mapped_table);
+			mapped_table = NULL;
+			current_instance++;
+			goto find_next_instance;
+		}
 	} else {		/* Case for a normal ACPI table */
 
 		if (osl_can_use_xsdt()) {
@@ -913,7 +952,7 @@ osl_get_bios_table(char *signature,
 
 			/* Skip NULL entries in RSDT/XSDT */
 
-			if (!table_address) {
+			if (table_address == 0) {
 				continue;
 			}
 
@@ -946,6 +985,8 @@ osl_get_bios_table(char *signature,
 		}
 	}
 
+exit_find_table:
+
 	if (!mapped_table) {
 		return (AE_LIMIT);
 	}
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
index 31b5a7f74015..d686e11936c4 100644
--- a/tools/power/acpi/tools/acpidump/apfiles.c
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -61,7 +61,7 @@ static int ap_is_existing_file(char *pathname);
 
 static int ap_is_existing_file(char *pathname)
 {
-#ifndef _GNU_EFI
+#if !defined(_GNU_EFI) && !defined(_EDK2_EFI)
 	struct stat stat_info;
 
 	if (!stat(pathname, &stat_info)) {
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
index dd82afa897bd..943b6b614683 100644
--- a/tools/power/acpi/tools/acpidump/apmain.c
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -300,7 +300,7 @@ static int ap_do_options(int argc, char **argv)
  *
  ******************************************************************************/
 
-#ifndef _GNU_EFI
+#if !defined(_GNU_EFI) && !defined(_EDK2_EFI)
 int ACPI_SYSTEM_XFACE main(int argc, char *argv[])
 #else
 int ACPI_SYSTEM_XFACE acpi_main(int argc, char *argv[])
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index d6e1c02ddcfe..4c5a481a850c 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -26,7 +26,7 @@ endif
 
 ifneq ($(OUTPUT),)
 # check that the output directory actually exists
-OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+OUTDIR := $(realpath $(OUTPUT))
 $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
 endif
 
diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c
index 9ea914378985..2dccf4998599 100644
--- a/tools/power/cpupower/utils/cpupower.c
+++ b/tools/power/cpupower/utils/cpupower.c
@@ -12,6 +12,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
+#include <sched.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/utsname.h>
@@ -31,6 +32,7 @@ static int cmd_help(int argc, const char **argv);
  */
 struct cpupower_cpu_info cpupower_cpu_info;
 int run_as_root;
+int base_cpu;
 /* Affected cpus chosen by -c/--cpu param */
 struct bitmask *cpus_chosen;
 
@@ -174,6 +176,7 @@ int main(int argc, const char *argv[])
 	unsigned int i, ret;
 	struct stat statbuf;
 	struct utsname uts;
+	char pathname[32];
 
 	cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
 
@@ -198,17 +201,23 @@ int main(int argc, const char *argv[])
 		argv[0] = cmd = "help";
 	}
 
-	get_cpu_info(0, &cpupower_cpu_info);
+	base_cpu = sched_getcpu();
+	if (base_cpu < 0) {
+		fprintf(stderr, _("No valid cpus found.\n"));
+		return EXIT_FAILURE;
+	}
+
+	get_cpu_info(&cpupower_cpu_info);
 	run_as_root = !geteuid();
 	if (run_as_root) {
 		ret = uname(&uts);
+		sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
 		if (!ret && !strcmp(uts.machine, "x86_64") &&
-		    stat("/dev/cpu/0/msr", &statbuf) != 0) {
+		    stat(pathname, &statbuf) != 0) {
 			if (system("modprobe msr") == -1)
 	fprintf(stderr, _("MSR access not available.\n"));
 		}
 	}
-		
 
 	for (i = 0; i < ARRAY_SIZE(commands); i++) {
 		struct cmd_struct *p = commands + i;
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
index 39c2c7d067bb..32d37c9be791 100644
--- a/tools/power/cpupower/utils/helpers/cpuid.c
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -42,7 +42,7 @@ cpuid_func(edx);
  *
  * TBD: Should there be a cpuid alternative for this if /proc is not mounted?
  */
-int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info)
+int get_cpu_info(struct cpupower_cpu_info *cpu_info)
 {
 	FILE *fp;
 	char value[64];
@@ -70,7 +70,7 @@ int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info)
 		if (!strncmp(value, "processor\t: ", 12))
 			sscanf(value, "processor\t: %u", &proc);
 
-		if (proc != cpu)
+		if (proc != (unsigned int)base_cpu)
 			continue;
 
 		/* Get CPU vendor */
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
index 799a18be60aa..41da392be448 100644
--- a/tools/power/cpupower/utils/helpers/helpers.h
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -34,6 +34,7 @@
 /* Internationalization ****************************/
 
 extern int run_as_root;
+extern int base_cpu;
 extern struct bitmask *cpus_chosen;
 
 /* Global verbose (-d) stuff *********************************/
@@ -87,11 +88,11 @@ struct cpupower_cpu_info {
  *
  * Extract CPU vendor, family, model, stepping info from /proc/cpuinfo
  *
- * Returns 0 on success or a negativ error code
+ * Returns 0 on success or a negative error code
  * Only used on x86, below global's struct values are zero/unknown on
  * other archs
  */
-extern int get_cpu_info(unsigned int cpu, struct cpupower_cpu_info *cpu_info);
+extern int get_cpu_info(struct cpupower_cpu_info *cpu_info);
 extern struct cpupower_cpu_info cpupower_cpu_info;
 /* cpuid and cpuinfo helpers  **************************/
 
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
index 601d719d4e60..a5e7ddf19dbd 100644
--- a/tools/power/cpupower/utils/helpers/misc.c
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -13,7 +13,7 @@ int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
 
 	*support = *active = *states = 0;
 
-	ret = get_cpu_info(0, &cpu_info);
+	ret = get_cpu_info(&cpu_info);
 	if (ret)
 		return ret;
 
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index ebeaba6571a3..f794d6bbb7e9 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -123,7 +123,7 @@ static int hsw_ext_start(void)
 			previous_count[num][cpu] = val;
 		}
 	}
-	hsw_ext_get_count(TSC, &tsc_at_measure_start, 0);
+	hsw_ext_get_count(TSC, &tsc_at_measure_start, base_cpu);
 	return 0;
 }
 
@@ -132,7 +132,7 @@ static int hsw_ext_stop(void)
 	unsigned long long val;
 	int num, cpu;
 
-	hsw_ext_get_count(TSC, &tsc_at_measure_end, 0);
+	hsw_ext_get_count(TSC, &tsc_at_measure_end, base_cpu);
 
 	for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
index c83f1606970b..d7c2a6d13dea 100644
--- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -80,7 +80,8 @@ static int *is_valid;
 static int mperf_get_tsc(unsigned long long *tsc)
 {
 	int ret;
-	ret = read_msr(0, MSR_TSC, tsc);
+
+	ret = read_msr(base_cpu, MSR_TSC, tsc);
 	if (ret)
 		dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
 	return ret;
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index d2a91dd0d563..abf8cb5f7349 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -129,7 +129,7 @@ static int nhm_start(void)
 	int num, cpu;
 	unsigned long long dbg, val;
 
-	nhm_get_count(TSC, &tsc_at_measure_start, 0);
+	nhm_get_count(TSC, &tsc_at_measure_start, base_cpu);
 
 	for (num = 0; num < NHM_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
@@ -137,7 +137,7 @@ static int nhm_start(void)
 			previous_count[num][cpu] = val;
 		}
 	}
-	nhm_get_count(TSC, &dbg, 0);
+	nhm_get_count(TSC, &dbg, base_cpu);
 	dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
 	return 0;
 }
@@ -148,7 +148,7 @@ static int nhm_stop(void)
 	unsigned long long dbg;
 	int num, cpu;
 
-	nhm_get_count(TSC, &tsc_at_measure_end, 0);
+	nhm_get_count(TSC, &tsc_at_measure_end, base_cpu);
 
 	for (num = 0; num < NHM_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
@@ -156,7 +156,7 @@ static int nhm_stop(void)
 			current_count[num][cpu] = val;
 		}
 	}
-	nhm_get_count(TSC, &dbg, 0);
+	nhm_get_count(TSC, &dbg, base_cpu);
 	dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
 
 	return 0;
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index efc8a69c9aba..a2b45219648d 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -120,7 +120,7 @@ static int snb_start(void)
 			previous_count[num][cpu] = val;
 		}
 	}
-	snb_get_count(TSC, &tsc_at_measure_start, 0);
+	snb_get_count(TSC, &tsc_at_measure_start, base_cpu);
 	return 0;
 }
 
@@ -129,7 +129,7 @@ static int snb_stop(void)
 	unsigned long long val;
 	int num, cpu;
 
-	snb_get_count(TSC, &tsc_at_measure_end, 0);
+	snb_get_count(TSC, &tsc_at_measure_end, base_cpu);
 
 	for (num = 0; num < SNB_CSTATE_COUNT; num++) {
 		for (cpu = 0; cpu < cpu_count; cpu++) {
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index 4d0ccc89e6c6..32f40eacdafe 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -4,7 +4,7 @@ DESTDIR		?=
 all:
 	@echo "Nothing to build"
 
-install :
+install : uninstall
 	install -d  $(DESTDIR)$(PREFIX)/lib/pm-graph
 	install analyze_suspend.py $(DESTDIR)$(PREFIX)/lib/pm-graph
 	install analyze_boot.py $(DESTDIR)$(PREFIX)/lib/pm-graph
@@ -17,12 +17,15 @@ install :
 	install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
 
 uninstall :
-	rm $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
-	rm $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+	rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
+	rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
 
-	rm $(DESTDIR)$(PREFIX)/bin/bootgraph
-	rm $(DESTDIR)$(PREFIX)/bin/sleepgraph
+	rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph
+	rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph
 
-	rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py
-	rm $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py
-	rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph
+	rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_boot.py
+	rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/analyze_suspend.py
+	rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*.pyc
+	if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \
+		rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \
+	fi;
diff --git a/tools/power/pm-graph/analyze_boot.py b/tools/power/pm-graph/analyze_boot.py
index 3e1dcbbf1adc..e83df141a597 100755
--- a/tools/power/pm-graph/analyze_boot.py
+++ b/tools/power/pm-graph/analyze_boot.py
@@ -42,7 +42,7 @@ import analyze_suspend as aslib
 #	 store system values and test parameters
 class SystemValues(aslib.SystemValues):
 	title = 'BootGraph'
-	version = 2.0
+	version = '2.1'
 	hostname = 'localhost'
 	testtime = ''
 	kernel = ''
@@ -50,9 +50,14 @@ class SystemValues(aslib.SystemValues):
 	ftracefile = ''
 	htmlfile = 'bootgraph.html'
 	outfile = ''
-	phoronix = False
-	addlogs = False
+	testdir = ''
+	testdirprefix = 'boot'
+	embedded = False
+	testlog = False
+	dmesglog = False
+	ftracelog = False
 	useftrace = False
+	usecallgraph = False
 	usedevsrc = True
 	suspendmode = 'boot'
 	max_graph_depth = 2
@@ -61,10 +66,12 @@ class SystemValues(aslib.SystemValues):
 	manual = False
 	iscronjob = False
 	timeformat = '%.6f'
+	bootloader = 'grub'
+	blexec = []
 	def __init__(self):
 		if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ):
-			self.phoronix = True
-			self.addlogs = True
+			self.embedded = True
+			self.dmesglog = True
 			self.outfile = os.environ['LOG_FILE']
 			self.htmlfile = os.environ['LOG_FILE']
 		self.hostname = platform.node()
@@ -76,42 +83,80 @@ class SystemValues(aslib.SystemValues):
 			self.kernel = self.kernelVersion(val)
 		else:
 			self.kernel = 'unknown'
+		self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S')
 	def kernelVersion(self, msg):
 		return msg.split()[2]
+	def checkFtraceKernelVersion(self):
+		val = tuple(map(int, self.kernel.split('-')[0].split('.')))
+		if val >= (4, 10, 0):
+			return True
+		return False
 	def kernelParams(self):
 		cmdline = 'initcall_debug log_buf_len=32M'
 		if self.useftrace:
-			cmdline += ' trace_buf_size=128M trace_clock=global '\
+			if self.cpucount > 0:
+				bs = min(self.memtotal / 2, 2*1024*1024) / self.cpucount
+			else:
+				bs = 131072
+			cmdline += ' trace_buf_size=%dK trace_clock=global '\
 			'trace_options=nooverwrite,funcgraph-abstime,funcgraph-cpu,'\
 			'funcgraph-duration,funcgraph-proc,funcgraph-tail,'\
 			'nofuncgraph-overhead,context-info,graph-time '\
 			'ftrace=function_graph '\
 			'ftrace_graph_max_depth=%d '\
 			'ftrace_graph_filter=%s' % \
-				(self.max_graph_depth, self.graph_filter)
+				(bs, self.max_graph_depth, self.graph_filter)
 		return cmdline
 	def setGraphFilter(self, val):
-		fp = open(self.tpath+'available_filter_functions')
-		master = fp.read().split('\n')
-		fp.close()
+		master = self.getBootFtraceFilterFunctions()
+		fs = ''
 		for i in val.split(','):
 			func = i.strip()
+			if func == '':
+				doError('badly formatted filter function string')
+			if '[' in func or ']' in func:
+				doError('loadable module functions not allowed - "%s"' % func)
+			if ' ' in func:
+				doError('spaces found in filter functions - "%s"' % func)
 			if func not in master:
 				doError('function "%s" not available for ftrace' % func)
-		self.graph_filter = val
+			if not fs:
+				fs = func
+			else:
+				fs += ','+func
+		if not fs:
+			doError('badly formatted filter function string')
+		self.graph_filter = fs
+	def getBootFtraceFilterFunctions(self):
+		self.rootCheck(True)
+		fp = open(self.tpath+'available_filter_functions')
+		fulllist = fp.read().split('\n')
+		fp.close()
+		list = []
+		for i in fulllist:
+			if not i or ' ' in i or '[' in i or ']' in i:
+				continue
+			list.append(i)
+		return list
+	def myCronJob(self, line):
+		if '@reboot' not in line:
+			return False
+		if 'bootgraph' in line or 'analyze_boot.py' in line or '-cronjob' in line:
+			return True
+		return False
 	def cronjobCmdString(self):
 		cmdline = '%s -cronjob' % os.path.abspath(sys.argv[0])
 		args = iter(sys.argv[1:])
 		for arg in args:
 			if arg in ['-h', '-v', '-cronjob', '-reboot']:
 				continue
-			elif arg in ['-o', '-dmesg', '-ftrace', '-filter']:
+			elif arg in ['-o', '-dmesg', '-ftrace', '-func']:
 				args.next()
 				continue
 			cmdline += ' '+arg
 		if self.graph_filter != 'do_one_initcall':
-			cmdline += ' -filter "%s"' % self.graph_filter
-		cmdline += ' -o "%s"' % os.path.abspath(self.htmlfile)
+			cmdline += ' -func "%s"' % self.graph_filter
+		cmdline += ' -o "%s"' % os.path.abspath(self.testdir)
 		return cmdline
 	def manualRebootRequired(self):
 		cmdline = self.kernelParams()
@@ -121,6 +166,39 @@ class SystemValues(aslib.SystemValues):
 		print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
 		print 'CMDLINE="%s"' % cmdline
 		sys.exit()
+	def getExec(self, cmd):
+		dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin',
+			'/usr/local/sbin', '/usr/local/bin']
+		for path in dirlist:
+			cmdfull = os.path.join(path, cmd)
+			if os.path.exists(cmdfull):
+				return cmdfull
+		return ''
+	def blGrub(self):
+		blcmd = ''
+		for cmd in ['update-grub', 'grub-mkconfig', 'grub2-mkconfig']:
+			if blcmd:
+				break
+			blcmd = self.getExec(cmd)
+		if not blcmd:
+			doError('[GRUB] missing update command')
+		if not os.path.exists('/etc/default/grub'):
+			doError('[GRUB] missing /etc/default/grub')
+		if 'grub2' in blcmd:
+			cfg = '/boot/grub2/grub.cfg'
+		else:
+			cfg = '/boot/grub/grub.cfg'
+		if not os.path.exists(cfg):
+			doError('[GRUB] missing %s' % cfg)
+		if 'update-grub' in blcmd:
+			self.blexec = [blcmd]
+		else:
+			self.blexec = [blcmd, '-o', cfg]
+	def getBootLoader(self):
+		if self.bootloader == 'grub':
+			self.blGrub()
+		else:
+			doError('unknown boot loader: %s' % self.bootloader)
 
 sysvals = SystemValues()
 
@@ -136,20 +214,23 @@ class Data(aslib.Data):
 	idstr = ''
 	html_device_id = 0
 	valid = False
-	initstart = 0.0
+	tUserMode = 0.0
 	boottime = ''
-	phases = ['boot']
+	phases = ['kernel', 'user']
 	do_one_initcall = False
 	def __init__(self, num):
 		self.testnumber = num
 		self.idstr = 'a'
 		self.dmesgtext = []
 		self.dmesg = {
-			'boot': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0, 'color': '#dddddd'}
+			'kernel': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0,
+				'order': 0, 'color': 'linear-gradient(to bottom, #fff, #bcf)'},
+			'user': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0,
+				'order': 1, 'color': '#fff'}
 		}
 	def deviceTopology(self):
 		return ''
-	def newAction(self, phase, name, start, end, ret, ulen):
+	def newAction(self, phase, name, pid, start, end, ret, ulen):
 		# new device callback for a specific phase
 		self.html_device_id += 1
 		devid = '%s%d' % (self.idstr, self.html_device_id)
@@ -163,41 +244,46 @@ class Data(aslib.Data):
 			name = '%s[%d]' % (origname, i)
 			i += 1
 		list[name] = {'name': name, 'start': start, 'end': end,
-			'pid': 0, 'length': length, 'row': 0, 'id': devid,
+			'pid': pid, 'length': length, 'row': 0, 'id': devid,
 			'ret': ret, 'ulen': ulen }
 		return name
-	def deviceMatch(self, cg):
+	def deviceMatch(self, pid, cg):
 		if cg.end - cg.start == 0:
 			return True
-		list = self.dmesg['boot']['list']
-		for devname in list:
-			dev = list[devname]
-			if cg.name == 'do_one_initcall':
-				if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0):
-					dev['ftrace'] = cg
-					self.do_one_initcall = True
-					return True
-			else:
-				if(cg.start > dev['start'] and cg.end < dev['end']):
-					if 'ftraces' not in dev:
-						dev['ftraces'] = []
-					dev['ftraces'].append(cg)
-					return True
+		for p in data.phases:
+			list = self.dmesg[p]['list']
+			for devname in list:
+				dev = list[devname]
+				if pid != dev['pid']:
+					continue
+				if cg.name == 'do_one_initcall':
+					if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0):
+						dev['ftrace'] = cg
+						self.do_one_initcall = True
+						return True
+				else:
+					if(cg.start > dev['start'] and cg.end < dev['end']):
+						if 'ftraces' not in dev:
+							dev['ftraces'] = []
+						dev['ftraces'].append(cg)
+						return True
 		return False
 
 # ----------------- FUNCTIONS --------------------
 
-# Function: loadKernelLog
+# Function: parseKernelLog
 # Description:
-#	 Load a raw kernel log from dmesg
-def loadKernelLog():
+#	 parse a kernel log for boot data
+def parseKernelLog():
+	phase = 'kernel'
 	data = Data(0)
-	data.dmesg['boot']['start'] = data.start = ktime = 0.0
+	data.dmesg['kernel']['start'] = data.start = ktime = 0.0
 	sysvals.stamp = {
 		'time': datetime.now().strftime('%B %d %Y, %I:%M:%S %p'),
 		'host': sysvals.hostname,
 		'mode': 'boot', 'kernel': ''}
 
+	tp = aslib.TestProps()
 	devtemp = dict()
 	if(sysvals.dmesgfile):
 		lf = open(sysvals.dmesgfile, 'r')
@@ -205,6 +291,13 @@ def loadKernelLog():
 		lf = Popen('dmesg', stdout=PIPE).stdout
 	for line in lf:
 		line = line.replace('\r\n', '')
+		# grab the stamp and sysinfo
+		if re.match(tp.stampfmt, line):
+			tp.stamp = line
+			continue
+		elif re.match(tp.sysinfofmt, line):
+			tp.sysinfo = line
+			continue
 		idx = line.find('[')
 		if idx > 1:
 			line = line[idx:]
@@ -215,7 +308,6 @@ def loadKernelLog():
 		if(ktime > 120):
 			break
 		msg = m.group('msg')
-		data.end = data.initstart = ktime
 		data.dmesgtext.append(line)
 		if(ktime == 0.0 and re.match('^Linux version .*', msg)):
 			if(not sysvals.stamp['kernel']):
@@ -228,43 +320,39 @@ def loadKernelLog():
 			data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S')
 			sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p')
 			continue
-		m = re.match('^calling *(?P<f>.*)\+.*', msg)
+		m = re.match('^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg)
 		if(m):
-			devtemp[m.group('f')] = ktime
+			func = m.group('f')
+			pid = int(m.group('p'))
+			devtemp[func] = (ktime, pid)
 			continue
 		m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg)
 		if(m):
 			data.valid = True
+			data.end = ktime
 			f, r, t = m.group('f', 'r', 't')
 			if(f in devtemp):
-				data.newAction('boot', f, devtemp[f], ktime, int(r), int(t))
-				data.end = ktime
+				start, pid = devtemp[f]
+				data.newAction(phase, f, pid, start, ktime, int(r), int(t))
 				del devtemp[f]
 			continue
 		if(re.match('^Freeing unused kernel memory.*', msg)):
-			break
-
-	data.dmesg['boot']['end'] = data.end
+			data.tUserMode = ktime
+			data.dmesg['kernel']['end'] = ktime
+			data.dmesg['user']['start'] = ktime
+			phase = 'user'
+
+	if tp.stamp:
+		sysvals.stamp = 0
+		tp.parseStamp(data, sysvals)
+	data.dmesg['user']['end'] = data.end
 	lf.close()
 	return data
 
-# Function: loadTraceLog
+# Function: parseTraceLog
 # Description:
 #	 Check if trace is available and copy to a temp file
-def loadTraceLog(data):
-	# load the data to a temp file if none given
-	if not sysvals.ftracefile:
-		lib = aslib.sysvals
-		aslib.rootCheck(True)
-		if not lib.verifyFtrace():
-			doError('ftrace not available')
-		if lib.fgetVal('current_tracer').strip() != 'function_graph':
-			doError('ftrace not configured for a boot callgraph')
-		sysvals.ftracefile = '/tmp/boot_ftrace.%s.txt' % os.getpid()
-		call('cat '+lib.tpath+'trace > '+sysvals.ftracefile, shell=True)
-	if not sysvals.ftracefile:
-		doError('No trace data available')
-
+def parseTraceLog(data):
 	# parse the trace log
 	ftemp = dict()
 	tp = aslib.TestProps()
@@ -306,9 +394,29 @@ def loadTraceLog(data):
 				print('Sanity check failed for %s-%d' % (proc, pid))
 				continue
 			# match cg data to devices
-			if not data.deviceMatch(cg):
+			if not data.deviceMatch(pid, cg):
 				print ' BAD: %s %s-%d [%f - %f]' % (cg.name, proc, pid, cg.start, cg.end)
 
+# Function: retrieveLogs
+# Description:
+#	 Create copies of dmesg and/or ftrace for later processing
+def retrieveLogs():
+	# check ftrace is configured first
+	if sysvals.useftrace:
+		tracer = sysvals.fgetVal('current_tracer').strip()
+		if tracer != 'function_graph':
+			doError('ftrace not configured for a boot callgraph')
+	# create the folder and get dmesg
+	sysvals.systemInfo(aslib.dmidecode(sysvals.mempath))
+	sysvals.initTestOutput('boot')
+	sysvals.writeDatafileHeader(sysvals.dmesgfile)
+	call('dmesg >> '+sysvals.dmesgfile, shell=True)
+	if not sysvals.useftrace:
+		return
+	# get ftrace
+	sysvals.writeDatafileHeader(sysvals.ftracefile)
+	call('cat '+sysvals.tpath+'trace >> '+sysvals.ftracefile, shell=True)
+
 # Function: colorForName
 # Description:
 #	 Generate a repeatable color from a list for a given name
@@ -353,18 +461,19 @@ def cgOverview(cg, minlen):
 #	 testruns: array of Data objects from parseKernelLog or parseTraceLog
 # Output:
 #	 True if the html file was created, false if it failed
-def createBootGraph(data, embedded):
+def createBootGraph(data):
 	# html function templates
 	html_srccall = '<div id={6} title="{5}" class="srccall" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{0}</div>\n'
 	html_timetotal = '<table class="time1">\n<tr>'\
-		'<td class="blue">Time from Kernel Boot to start of User Mode: <b>{0} ms</b></td>'\
+		'<td class="blue">Init process starts @ <b>{0} ms</b></td>'\
+		'<td class="blue">Last initcall ends @ <b>{1} ms</b></td>'\
 		'</tr>\n</table>\n'
 
 	# device timeline
 	devtl = aslib.Timeline(100, 20)
 
 	# write the test title and general info header
-	devtl.createHeader(sysvals, 'noftrace')
+	devtl.createHeader(sysvals)
 
 	# Generate the header for this timeline
 	t0 = data.start
@@ -373,84 +482,98 @@ def createBootGraph(data, embedded):
 	if(tTotal == 0):
 		print('ERROR: No timeline data')
 		return False
-	boot_time = '%.0f'%(tTotal*1000)
-	devtl.html += html_timetotal.format(boot_time)
+	user_mode = '%.0f'%(data.tUserMode*1000)
+	last_init = '%.0f'%(tTotal*1000)
+	devtl.html += html_timetotal.format(user_mode, last_init)
 
 	# determine the maximum number of rows we need to draw
-	phase = 'boot'
-	list = data.dmesg[phase]['list']
 	devlist = []
-	for devname in list:
-		d = aslib.DevItem(0, phase, list[devname])
-		devlist.append(d)
-	devtl.getPhaseRows(devlist)
+	for p in data.phases:
+		list = data.dmesg[p]['list']
+		for devname in list:
+			d = aslib.DevItem(0, p, list[devname])
+			devlist.append(d)
+		devtl.getPhaseRows(devlist, 0, 'start')
 	devtl.calcTotalRows()
 
 	# draw the timeline background
 	devtl.createZoomBox()
-	boot = data.dmesg[phase]
-	length = boot['end']-boot['start']
-	left = '%.3f' % (((boot['start']-t0)*100.0)/tTotal)
-	width = '%.3f' % ((length*100.0)/tTotal)
-	devtl.html += devtl.html_tblock.format(phase, left, width, devtl.scaleH)
-	devtl.html += devtl.html_phase.format('0', '100', \
-		'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
-		'white', '')
+	devtl.html += devtl.html_tblock.format('boot', '0', '100', devtl.scaleH)
+	for p in data.phases:
+		phase = data.dmesg[p]
+		length = phase['end']-phase['start']
+		left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
+		width = '%.3f' % ((length*100.0)/tTotal)
+		devtl.html += devtl.html_phase.format(left, width, \
+			'%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+			phase['color'], '')
 
 	# draw the device timeline
 	num = 0
 	devstats = dict()
-	for devname in sorted(list):
-		cls, color = colorForName(devname)
-		dev = list[devname]
-		info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0,
-			dev['ulen']/1000.0, dev['ret'])
-		devstats[dev['id']] = {'info':info}
-		dev['color'] = color
-		height = devtl.phaseRowHeight(0, phase, dev['row'])
-		top = '%.6f' % ((dev['row']*height) + devtl.scaleH)
-		left = '%.6f' % (((dev['start']-t0)*100)/tTotal)
-		width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal)
-		length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
-		devtl.html += devtl.html_device.format(dev['id'],
-			devname+length+'kernel_mode', left, top, '%.3f'%height,
-			width, devname, ' '+cls, '')
-		rowtop = devtl.phaseRowTop(0, phase, dev['row'])
-		height = '%.6f' % (devtl.rowH / 2)
-		top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2))
-		if data.do_one_initcall:
-			if('ftrace' not in dev):
+	for phase in data.phases:
+		list = data.dmesg[phase]['list']
+		for devname in sorted(list):
+			cls, color = colorForName(devname)
+			dev = list[devname]
+			info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0,
+				dev['ulen']/1000.0, dev['ret'])
+			devstats[dev['id']] = {'info':info}
+			dev['color'] = color
+			height = devtl.phaseRowHeight(0, phase, dev['row'])
+			top = '%.6f' % ((dev['row']*height) + devtl.scaleH)
+			left = '%.6f' % (((dev['start']-t0)*100)/tTotal)
+			width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal)
+			length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+			devtl.html += devtl.html_device.format(dev['id'],
+				devname+length+phase+'_mode', left, top, '%.3f'%height,
+				width, devname, ' '+cls, '')
+			rowtop = devtl.phaseRowTop(0, phase, dev['row'])
+			height = '%.6f' % (devtl.rowH / 2)
+			top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2))
+			if data.do_one_initcall:
+				if('ftrace' not in dev):
+					continue
+				cg = dev['ftrace']
+				large, stats = cgOverview(cg, 0.001)
+				devstats[dev['id']]['fstat'] = stats
+				for l in large:
+					left = '%f' % (((l.time-t0)*100)/tTotal)
+					width = '%f' % (l.length*100/tTotal)
+					title = '%s (%0.3fms)' % (l.name, l.length * 1000.0)
+					devtl.html += html_srccall.format(l.name, left,
+						top, height, width, title, 'x%d'%num)
+					num += 1
+				continue
+			if('ftraces' not in dev):
 				continue
-			cg = dev['ftrace']
-			large, stats = cgOverview(cg, 0.001)
-			devstats[dev['id']]['fstat'] = stats
-			for l in large:
-				left = '%f' % (((l.time-t0)*100)/tTotal)
-				width = '%f' % (l.length*100/tTotal)
-				title = '%s (%0.3fms)' % (l.name, l.length * 1000.0)
-				devtl.html += html_srccall.format(l.name, left,
-					top, height, width, title, 'x%d'%num)
+			for cg in dev['ftraces']:
+				left = '%f' % (((cg.start-t0)*100)/tTotal)
+				width = '%f' % ((cg.end-cg.start)*100/tTotal)
+				cglen = (cg.end - cg.start) * 1000.0
+				title = '%s (%0.3fms)' % (cg.name, cglen)
+				cg.id = 'x%d' % num
+				devtl.html += html_srccall.format(cg.name, left,
+					top, height, width, title, dev['id']+cg.id)
 				num += 1
-			continue
-		if('ftraces' not in dev):
-			continue
-		for cg in dev['ftraces']:
-			left = '%f' % (((cg.start-t0)*100)/tTotal)
-			width = '%f' % ((cg.end-cg.start)*100/tTotal)
-			cglen = (cg.end - cg.start) * 1000.0
-			title = '%s (%0.3fms)' % (cg.name, cglen)
-			cg.id = 'x%d' % num
-			devtl.html += html_srccall.format(cg.name, left,
-				top, height, width, title, dev['id']+cg.id)
-			num += 1
 
 	# draw the time scale, try to make the number of labels readable
-	devtl.createTimeScale(t0, tMax, tTotal, phase)
+	devtl.createTimeScale(t0, tMax, tTotal, 'boot')
 	devtl.html += '</div>\n'
 
 	# timeline is finished
 	devtl.html += '</div>\n</div>\n'
 
+	# draw a legend which describes the phases by color
+	devtl.html += '<div class="legend">\n'
+	pdelta = 20.0
+	pmargin = 36.0
+	for phase in data.phases:
+		order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+		devtl.html += devtl.html_legend.format(order, \
+			data.dmesg[phase]['color'], phase+'_mode', phase[0])
+	devtl.html += '</div>\n'
+
 	if(sysvals.outfile == sysvals.htmlfile):
 		hf = open(sysvals.htmlfile, 'a')
 	else:
@@ -474,7 +597,7 @@ def createBootGraph(data, embedded):
 		.fstat td {text-align:left;width:35px;}\n\
 		.srccall {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
 		.srccall:hover {color:white;font-weight:bold;border:1px solid white;}\n'
-	if(not embedded):
+	if(not sysvals.embedded):
 		aslib.addCSS(hf, sysvals, 1, False, extra)
 
 	# write the device timeline
@@ -495,9 +618,11 @@ def createBootGraph(data, embedded):
 	html = \
 		'<div id="devicedetailtitle"></div>\n'\
 		'<div id="devicedetail" style="display:none;">\n'\
-		'<div id="devicedetail0">\n'\
-		'<div id="kernel_mode" class="phaselet" style="left:0%;width:100%;background:#DDDDDD"></div>\n'\
-		'</div>\n</div>\n'\
+		'<div id="devicedetail0">\n'
+	for p in data.phases:
+		phase = data.dmesg[p]
+		html += devtl.html_phaselet.format(p+'_mode', '0', '100', phase['color'])
+	html += '</div>\n</div>\n'\
 		'<script type="text/javascript">\n'+statinfo+\
 		'</script>\n'
 	hf.write(html)
@@ -507,21 +632,21 @@ def createBootGraph(data, embedded):
 		aslib.addCallgraphs(sysvals, hf, data)
 
 	# add the dmesg log as a hidden div
-	if sysvals.addlogs:
+	if sysvals.dmesglog:
 		hf.write('<div id="dmesglog" style="display:none;">\n')
 		for line in data.dmesgtext:
 			line = line.replace('<', '&lt').replace('>', '&gt')
 			hf.write(line)
 		hf.write('</div>\n')
 
-	if(not embedded):
+	if(not sysvals.embedded):
 		# write the footer and close
 		aslib.addScriptCode(hf, [data])
 		hf.write('</body>\n</html>\n')
 	else:
 		# embedded out will be loaded in a page, skip the js
 		hf.write('<div id=bounds style=display:none>%f,%f</div>' % \
-			(data.start*1000, data.initstart*1000))
+			(data.start*1000, data.end*1000))
 	hf.close()
 	return True
 
@@ -533,17 +658,20 @@ def updateCron(restore=False):
 	if not restore:
 		sysvals.rootUser(True)
 	crondir = '/var/spool/cron/crontabs/'
-	cronfile = crondir+'root'
-	backfile = crondir+'root-analyze_boot-backup'
+	if not os.path.exists(crondir):
+		crondir = '/var/spool/cron/'
 	if not os.path.exists(crondir):
 		doError('%s not found' % crondir)
-	out = Popen(['which', 'crontab'], stdout=PIPE).stdout.read()
-	if not out:
+	cronfile = crondir+'root'
+	backfile = crondir+'root-analyze_boot-backup'
+	cmd = sysvals.getExec('crontab')
+	if not cmd:
 		doError('crontab not found')
 	# on restore: move the backup cron back into place
 	if restore:
 		if os.path.exists(backfile):
 			shutil.move(backfile, cronfile)
+			call([cmd, cronfile])
 		return
 	# backup current cron and install new one with reboot
 	if os.path.exists(cronfile):
@@ -556,13 +684,13 @@ def updateCron(restore=False):
 		fp = open(backfile, 'r')
 		op = open(cronfile, 'w')
 		for line in fp:
-			if '@reboot' not in line:
+			if not sysvals.myCronJob(line):
 				op.write(line)
 				continue
 		fp.close()
 		op.write('@reboot python %s\n' % sysvals.cronjobCmdString())
 		op.close()
-		res = call('crontab %s' % cronfile, shell=True)
+		res = call([cmd, cronfile])
 	except Exception, e:
 		print 'Exception: %s' % str(e)
 		shutil.move(backfile, cronfile)
@@ -577,25 +705,16 @@ def updateGrub(restore=False):
 	# call update-grub on restore
 	if restore:
 		try:
-			call(['update-grub'], stderr=PIPE, stdout=PIPE,
+			call(sysvals.blexec, stderr=PIPE, stdout=PIPE,
 				env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
 		except Exception, e:
 			print 'Exception: %s\n' % str(e)
 		return
-	# verify we can do this
-	sysvals.rootUser(True)
-	grubfile = '/etc/default/grub'
-	if not os.path.exists(grubfile):
-		print 'ERROR: Unable to set the kernel parameters via grub.\n'
-		sysvals.manualRebootRequired()
-	out = Popen(['which', 'update-grub'], stdout=PIPE).stdout.read()
-	if not out:
-		print 'ERROR: Unable to set the kernel parameters via grub.\n'
-		sysvals.manualRebootRequired()
-
 	# extract the option and create a grub config without it
+	sysvals.rootUser(True)
 	tgtopt = 'GRUB_CMDLINE_LINUX_DEFAULT'
 	cmdline = ''
+	grubfile = '/etc/default/grub'
 	tempfile = '/etc/default/grub.analyze_boot'
 	shutil.move(grubfile, tempfile)
 	res = -1
@@ -622,7 +741,7 @@ def updateGrub(restore=False):
 		# if the target option value is in quotes, strip them
 		sp = '"'
 		val = cmdline.strip()
-		if val[0] == '\'' or val[0] == '"':
+		if val and (val[0] == '\'' or val[0] == '"'):
 			sp = val[0]
 			val = val.strip(sp)
 		cmdline = val
@@ -633,7 +752,7 @@ def updateGrub(restore=False):
 		# write out the updated target option
 		op.write('\n%s=%s%s%s\n' % (tgtopt, sp, cmdline, sp))
 		op.close()
-		res = call('update-grub')
+		res = call(sysvals.blexec)
 		os.remove(grubfile)
 	except Exception, e:
 		print 'Exception: %s' % str(e)
@@ -641,10 +760,18 @@ def updateGrub(restore=False):
 	# cleanup
 	shutil.move(tempfile, grubfile)
 	if res != 0:
-		doError('update-grub failed')
+		doError('update grub failed')
 
-# Function: doError
+# Function: updateKernelParams
 # Description:
+#	 update boot conf for all kernels with our parameters
+def updateKernelParams(restore=False):
+	# find the boot loader
+	sysvals.getBootLoader()
+	if sysvals.bootloader == 'grub':
+		updateGrub(restore)
+
+# Function: doError Description:
 #	 generic error function for catastrphic failures
 # Arguments:
 #	 msg: the error message to print
@@ -660,7 +787,7 @@ def doError(msg, help=False):
 #	 print out the help text
 def printHelp():
 	print('')
-	print('%s v%.1f' % (sysvals.title, sysvals.version))
+	print('%s v%s' % (sysvals.title, sysvals.version))
 	print('Usage: bootgraph <options> <command>')
 	print('')
 	print('Description:')
@@ -669,13 +796,19 @@ def printHelp():
 	print('  the start of the init process.')
 	print('')
 	print('  If no specific command is given the tool reads the current dmesg')
-	print('  and/or ftrace log and outputs bootgraph.html')
+	print('  and/or ftrace log and creates a timeline')
+	print('')
+	print('  Generates output files in subdirectory: boot-yymmdd-HHMMSS')
+	print('   HTML output:                    <hostname>_boot.html')
+	print('   raw dmesg output:               <hostname>_boot_dmesg.txt')
+	print('   raw ftrace output:              <hostname>_boot_ftrace.txt')
 	print('')
 	print('Options:')
 	print('  -h            Print this help text')
 	print('  -v            Print the current tool version')
 	print('  -addlogs      Add the dmesg log to the html output')
-	print('  -o file       Html timeline name (default: bootgraph.html)')
+	print('  -o name       Overrides the output subdirectory name when running a new test')
+	print('                default: boot-{date}-{time}')
 	print(' [advanced]')
 	print('  -f            Use ftrace to add function detail (default: disabled)')
 	print('  -callgraph    Add callgraph detail, can be very large (default: disabled)')
@@ -683,13 +816,18 @@ def printHelp():
 	print('  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
 	print('  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
 	print('  -expandcg     pre-expand the callgraph data in the html output (default: disabled)')
-	print('  -filter list  Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
-	print(' [commands]')
+	print('  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
+	print('  -cgfilter S   Filter the callgraph output in the timeline')
+	print('  -bl name      Use the following boot loader for kernel params (default: grub)')
 	print('  -reboot       Reboot the machine automatically and generate a new timeline')
-	print('  -manual       Show the requirements to generate a new timeline manually')
-	print('  -dmesg file   Load a stored dmesg file (used with -ftrace)')
-	print('  -ftrace file  Load a stored ftrace file (used with -dmesg)')
+	print('  -manual       Show the steps to generate a new timeline manually (used with -reboot)')
+	print('')
+	print('Other commands:')
 	print('  -flistall     Print all functions capable of being captured in ftrace')
+	print('  -sysinfo      Print out system info extracted from BIOS')
+	print(' [redo]')
+	print('  -dmesg file   Create HTML output using dmesg input (used with -ftrace)')
+	print('  -ftrace file  Create HTML output using ftrace input (used with -dmesg)')
 	print('')
 	return True
 
@@ -698,14 +836,15 @@ def printHelp():
 if __name__ == '__main__':
 	# loop through the command line arguments
 	cmd = ''
-	simplecmds = ['-updategrub', '-flistall']
+	testrun = True
+	simplecmds = ['-sysinfo', '-kpupdate', '-flistall', '-checkbl']
 	args = iter(sys.argv[1:])
 	for arg in args:
 		if(arg == '-h'):
 			printHelp()
 			sys.exit()
 		elif(arg == '-v'):
-			print("Version %.1f" % sysvals.version)
+			print("Version %s" % sysvals.version)
 			sys.exit()
 		elif(arg in simplecmds):
 			cmd = arg[1:]
@@ -716,16 +855,32 @@ if __name__ == '__main__':
 			sysvals.usecallgraph = True
 		elif(arg == '-mincg'):
 			sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0)
+		elif(arg == '-cgfilter'):
+			try:
+				val = args.next()
+			except:
+				doError('No callgraph functions supplied', True)
+			sysvals.setDeviceFilter(val)
+		elif(arg == '-bl'):
+			try:
+				val = args.next()
+			except:
+				doError('No boot loader name supplied', True)
+			if val.lower() not in ['grub']:
+				doError('Unknown boot loader: %s' % val, True)
+			sysvals.bootloader = val.lower()
 		elif(arg == '-timeprec'):
 			sysvals.setPrecision(aslib.getArgInt('-timeprec', args, 0, 6))
 		elif(arg == '-maxdepth'):
 			sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000)
-		elif(arg == '-filter'):
+		elif(arg == '-func'):
 			try:
 				val = args.next()
 			except:
 				doError('No filter functions supplied', True)
-			aslib.rootCheck(True)
+			sysvals.useftrace = True
+			sysvals.usecallgraph = True
+			sysvals.rootCheck(True)
 			sysvals.setGraphFilter(val)
 		elif(arg == '-ftrace'):
 			try:
@@ -734,9 +889,10 @@ if __name__ == '__main__':
 				doError('No ftrace file supplied', True)
 			if(os.path.exists(val) == False):
 				doError('%s does not exist' % val)
+			testrun = False
 			sysvals.ftracefile = val
 		elif(arg == '-addlogs'):
-			sysvals.addlogs = True
+			sysvals.dmesglog = True
 		elif(arg == '-expandcg'):
 			sysvals.cgexp = True
 		elif(arg == '-dmesg'):
@@ -748,18 +904,15 @@ if __name__ == '__main__':
 				doError('%s does not exist' % val)
 			if(sysvals.htmlfile == val or sysvals.outfile == val):
 				doError('Output filename collision')
+			testrun = False
 			sysvals.dmesgfile = val
 		elif(arg == '-o'):
 			try:
 				val = args.next()
 			except:
-				doError('No HTML filename supplied', True)
-			if(sysvals.dmesgfile == val or sysvals.ftracefile == val):
-				doError('Output filename collision')
-			sysvals.htmlfile = val
+				doError('No subdirectory name supplied', True)
+			sysvals.testdir = sysvals.setOutputFolder(val)
 		elif(arg == '-reboot'):
-			if sysvals.iscronjob:
-				doError('-reboot and -cronjob are incompatible')
 			sysvals.reboot = True
 		elif(arg == '-manual'):
 			sysvals.reboot = True
@@ -767,58 +920,93 @@ if __name__ == '__main__':
 		# remaining options are only for cron job use
 		elif(arg == '-cronjob'):
 			sysvals.iscronjob = True
-			if sysvals.reboot:
-				doError('-reboot and -cronjob are incompatible')
 		else:
 			doError('Invalid argument: '+arg, True)
 
+	# compatibility errors and access checks
+	if(sysvals.iscronjob and (sysvals.reboot or \
+		sysvals.dmesgfile or sysvals.ftracefile or cmd)):
+		doError('-cronjob is meant for batch purposes only')
+	if(sysvals.reboot and (sysvals.dmesgfile or sysvals.ftracefile)):
+		doError('-reboot and -dmesg/-ftrace are incompatible')
+	if cmd or sysvals.reboot or sysvals.iscronjob or testrun:
+		sysvals.rootCheck(True)
+	if (testrun and sysvals.useftrace) or cmd == 'flistall':
+		if not sysvals.verifyFtrace():
+			doError('Ftrace is not properly enabled')
+
+	# run utility commands
+	sysvals.cpuInfo()
 	if cmd != '':
-		if cmd == 'updategrub':
-			updateGrub()
+		if cmd == 'kpupdate':
+			updateKernelParams()
 		elif cmd == 'flistall':
-			sysvals.getFtraceFilterFunctions(False)
+			for f in sysvals.getBootFtraceFilterFunctions():
+				print f
+		elif cmd == 'checkbl':
+			sysvals.getBootLoader()
+			print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)
+		elif(cmd == 'sysinfo'):
+			sysvals.printSystemInfo()
 		sys.exit()
 
-	# update grub, setup a cronjob, and reboot
+	# reboot: update grub, setup a cronjob, and reboot
 	if sysvals.reboot:
+		if (sysvals.useftrace or sysvals.usecallgraph) and \
+			not sysvals.checkFtraceKernelVersion():
+			doError('Ftrace functionality requires kernel v4.10 or newer')
 		if not sysvals.manual:
-			updateGrub()
+			updateKernelParams()
 			updateCron()
 			call('reboot')
 		else:
 			sysvals.manualRebootRequired()
 		sys.exit()
 
-	# disable the cronjob
+	# cronjob: remove the cronjob, grub changes, and disable ftrace
 	if sysvals.iscronjob:
 		updateCron(True)
-		updateGrub(True)
+		updateKernelParams(True)
+		try:
+			sysvals.fsetVal('0', 'tracing_on')
+		except:
+			pass
 
-	data = loadKernelLog()
-	if sysvals.useftrace:
-		loadTraceLog(data)
-		if sysvals.iscronjob:
-			try:
-				sysvals.fsetVal('0', 'tracing_on')
-			except:
-				pass
+	# testrun: generate copies of the logs
+	if testrun:
+		retrieveLogs()
+	else:
+		sysvals.setOutputFile()
 
-	if(sysvals.outfile and sysvals.phoronix):
-		fp = open(sysvals.outfile, 'w')
-		fp.write('pass %s initstart %.3f end %.3f boot %s\n' %
-			(data.valid, data.initstart*1000, data.end*1000, data.boottime))
-		fp.close()
-	if(not data.valid):
-		if sysvals.dmesgfile:
+	# process the log data
+	if sysvals.dmesgfile:
+		data = parseKernelLog()
+		if(not data.valid):
 			doError('No initcall data found in %s' % sysvals.dmesgfile)
-		else:
-			doError('No initcall data found, is initcall_debug enabled?')
+		if sysvals.useftrace and sysvals.ftracefile:
+			parseTraceLog(data)
+	else:
+		doError('dmesg file required')
 
 	print('          Host: %s' % sysvals.hostname)
 	print('     Test time: %s' % sysvals.testtime)
 	print('     Boot time: %s' % data.boottime)
 	print('Kernel Version: %s' % sysvals.kernel)
 	print('  Kernel start: %.3f' % (data.start * 1000))
-	print('    init start: %.3f' % (data.initstart * 1000))
+	print('Usermode start: %.3f' % (data.tUserMode * 1000))
+	print('Last Init Call: %.3f' % (data.end * 1000))
+
+	# handle embedded output logs
+	if(sysvals.outfile and sysvals.embedded):
+		fp = open(sysvals.outfile, 'w')
+		fp.write('pass %s initstart %.3f end %.3f boot %s\n' %
+			(data.valid, data.tUserMode*1000, data.end*1000, data.boottime))
+		fp.close()
+
+	createBootGraph(data)
 
-	createBootGraph(data, sysvals.phoronix)
+	# if running as root, change output dir owner to sudo_user
+	if testrun and os.path.isdir(sysvals.testdir) and \
+		os.getuid() == 0 and 'SUDO_USER' in os.environ:
+		cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
+		call(cmd.format(os.environ['SUDO_USER'], sysvals.testdir), shell=True)
diff --git a/tools/power/pm-graph/analyze_suspend.py b/tools/power/pm-graph/analyze_suspend.py
index a9206e67fc1f..1b60fe203741 100755
--- a/tools/power/pm-graph/analyze_suspend.py
+++ b/tools/power/pm-graph/analyze_suspend.py
@@ -68,10 +68,12 @@ from subprocess import call, Popen, PIPE
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '4.6'
+	version = '4.7'
 	ansi = False
 	verbose = False
-	addlogs = False
+	testlog = True
+	dmesglog = False
+	ftracelog = False
 	mindevlen = 0.0
 	mincglen = 0.0
 	cgphase = ''
@@ -79,10 +81,11 @@ class SystemValues:
 	max_graph_depth = 0
 	callloopmaxgap = 0.0001
 	callloopmaxlen = 0.005
+	cpucount = 0
+	memtotal = 204800
 	srgap = 0
 	cgexp = False
-	outdir = ''
-	testdir = '.'
+	testdir = ''
 	tpath = '/sys/kernel/debug/tracing/'
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
 	epath = '/sys/kernel/debug/tracing/events/power/'
@@ -95,14 +98,17 @@ class SystemValues:
 	testcommand = ''
 	mempath = '/dev/mem'
 	powerfile = '/sys/power/state'
+	mempowerfile = '/sys/power/mem_sleep'
 	suspendmode = 'mem'
+	memmode = ''
 	hostname = 'localhost'
 	prefix = 'test'
 	teststamp = ''
+	sysstamp = ''
 	dmesgstart = 0.0
 	dmesgfile = ''
 	ftracefile = ''
-	htmlfile = ''
+	htmlfile = 'output.html'
 	embedded = False
 	rtcwake = True
 	rtcwaketime = 15
@@ -127,9 +133,6 @@ class SystemValues:
 	devpropfmt = '# Device Properties: .*'
 	tracertypefmt = '# tracer: (?P<t>.*)'
 	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
-	stampfmt = '# suspend-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
-				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
-				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
 	tracefuncs = {
 		'sys_sync': dict(),
 		'pm_prepare_console': dict(),
@@ -218,7 +221,7 @@ class SystemValues:
 		# if this is a phoronix test run, set some default options
 		if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ):
 			self.embedded = True
-			self.addlogs = True
+			self.dmesglog = self.ftracelog = True
 			self.htmlfile = os.environ['LOG_FILE']
 		self.archargs = 'args_'+platform.machine()
 		self.hostname = platform.node()
@@ -233,6 +236,13 @@ class SystemValues:
 			self.rtcpath = rtc
 		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
 			self.ansi = True
+		self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S')
+	def rootCheck(self, fatal=True):
+		if(os.access(self.powerfile, os.W_OK)):
+			return True
+		if fatal:
+			doError('This command requires sysfs mount and root access')
+		return False
 	def rootUser(self, fatal=False):
 		if 'USER' in os.environ and os.environ['USER'] == 'root':
 			return True
@@ -249,30 +259,60 @@ class SystemValues:
 		args['date'] = n.strftime('%y%m%d')
 		args['time'] = n.strftime('%H%M%S')
 		args['hostname'] = self.hostname
-		self.outdir = value.format(**args)
+		return value.format(**args)
 	def setOutputFile(self):
-		if((self.htmlfile == '') and (self.dmesgfile != '')):
+		if self.dmesgfile != '':
 			m = re.match('(?P<name>.*)_dmesg\.txt$', self.dmesgfile)
 			if(m):
 				self.htmlfile = m.group('name')+'.html'
-		if((self.htmlfile == '') and (self.ftracefile != '')):
+		if self.ftracefile != '':
 			m = re.match('(?P<name>.*)_ftrace\.txt$', self.ftracefile)
 			if(m):
 				self.htmlfile = m.group('name')+'.html'
-		if(self.htmlfile == ''):
-			self.htmlfile = 'output.html'
-	def initTestOutput(self, subdir, testpath=''):
+	def systemInfo(self, info):
+		p = c = m = b = ''
+		if 'baseboard-manufacturer' in info:
+			m = info['baseboard-manufacturer']
+		elif 'system-manufacturer' in info:
+			m = info['system-manufacturer']
+		if 'baseboard-product-name' in info:
+			p = info['baseboard-product-name']
+		elif 'system-product-name' in info:
+			p = info['system-product-name']
+		if 'processor-version' in info:
+			c = info['processor-version']
+		if 'bios-version' in info:
+			b = info['bios-version']
+		self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | numcpu:%d | memsz:%d' % \
+			(m, p, c, b, self.cpucount, self.memtotal)
+	def printSystemInfo(self):
+		self.rootCheck(True)
+		out = dmidecode(self.mempath, True)
+		fmt = '%-24s: %s'
+		for name in sorted(out):
+			print fmt % (name, out[name])
+		print fmt % ('cpucount', ('%d' % self.cpucount))
+		print fmt % ('memtotal', ('%d kB' % self.memtotal))
+	def cpuInfo(self):
+		self.cpucount = 0
+		fp = open('/proc/cpuinfo', 'r')
+		for line in fp:
+			if re.match('^processor[ \t]*:[ \t]*[0-9]*', line):
+				self.cpucount += 1
+		fp.close()
+		fp = open('/proc/meminfo', 'r')
+		for line in fp:
+			m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line)
+			if m:
+				self.memtotal = int(m.group('sz'))
+				break
+		fp.close()
+	def initTestOutput(self, name):
 		self.prefix = self.hostname
 		v = open('/proc/version', 'r').read().strip()
 		kver = string.split(v)[2]
-		n = datetime.now()
-		testtime = n.strftime('suspend-%m%d%y-%H%M%S')
-		if not testpath:
-			testpath = n.strftime('suspend-%y%m%d-%H%M%S')
-		if(subdir != "."):
-			self.testdir = subdir+"/"+testpath
-		else:
-			self.testdir = testpath
+		fmt = name+'-%m%d%y-%H%M%S'
+		testtime = datetime.now().strftime(fmt)
 		self.teststamp = \
 			'# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver
 		if(self.embedded):
@@ -355,7 +395,7 @@ class SystemValues:
 				continue
 			self.tracefuncs[i] = dict()
 	def getFtraceFilterFunctions(self, current):
-		rootCheck(True)
+		self.rootCheck(True)
 		if not current:
 			call('cat '+self.tpath+'available_filter_functions', shell=True)
 			return
@@ -453,7 +493,7 @@ class SystemValues:
 		val += '\nr:%s_ret %s $retval\n' % (name, func)
 		return val
 	def addKprobes(self, output=False):
-		if len(sysvals.kprobes) < 1:
+		if len(self.kprobes) < 1:
 			return
 		if output:
 			print('    kprobe functions in this kernel:')
@@ -525,7 +565,7 @@ class SystemValues:
 			fp.flush()
 			fp.close()
 		except:
-			pass
+			return False
 		return True
 	def fgetVal(self, path):
 		file = self.tpath+path
@@ -566,9 +606,15 @@ class SystemValues:
 		self.cleanupFtrace()
 		# set the trace clock to global
 		self.fsetVal('global', 'trace_clock')
-		# set trace buffer to a huge value
 		self.fsetVal('nop', 'current_tracer')
-		self.fsetVal('131073', 'buffer_size_kb')
+		# set trace buffer to a huge value
+		if self.usecallgraph or self.usedevsrc:
+			tgtsize = min(self.memtotal / 2, 2*1024*1024)
+			maxbuf = '%d' % (tgtsize / max(1, self.cpucount))
+			if self.cpucount < 1 or not self.fsetVal(maxbuf, 'buffer_size_kb'):
+				self.fsetVal('131072', 'buffer_size_kb')
+		else:
+			self.fsetVal('16384', 'buffer_size_kb')
 		# go no further if this is just a status check
 		if testing:
 			return
@@ -641,6 +687,15 @@ class SystemValues:
 		if not self.ansi:
 			return str
 		return '\x1B[%d;40m%s\x1B[m' % (color, str)
+	def writeDatafileHeader(self, filename, fwdata=[]):
+		fp = open(filename, 'w')
+		fp.write(self.teststamp+'\n')
+		fp.write(self.sysstamp+'\n')
+		if(self.suspendmode == 'mem' or self.suspendmode == 'command'):
+			for fw in fwdata:
+				if(fw):
+					fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+		fp.close()
 
 sysvals = SystemValues()
 suspendmodename = {
@@ -1008,6 +1063,12 @@ class Data:
 			else:
 				self.trimTime(self.tSuspended, \
 					self.tResumed-self.tSuspended, False)
+	def getTimeValues(self):
+		sktime = (self.dmesg['suspend_machine']['end'] - \
+			self.tKernSus) * 1000
+		rktime = (self.dmesg['resume_complete']['end'] - \
+			self.dmesg['resume_machine']['start']) * 1000
+		return (sktime, rktime)
 	def setPhase(self, phase, ktime, isbegin):
 		if(isbegin):
 			self.dmesg[phase]['start'] = ktime
@@ -1517,7 +1578,7 @@ class FTraceCallGraph:
 			prelinedep += 1
 		last = 0
 		lasttime = line.time
-		virtualfname = 'execution_misalignment'
+		virtualfname = 'missing_function_name'
 		if len(self.list) > 0:
 			last = self.list[-1]
 			lasttime = last.time
@@ -1773,24 +1834,30 @@ class Timeline:
 	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
 	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
 	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
+	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	def __init__(self, rowheight, scaleheight):
 		self.rowH = rowheight
 		self.scaleH = scaleheight
 		self.html = ''
-	def createHeader(self, sv, suppress=''):
+	def createHeader(self, sv):
 		if(not sv.stamp['time']):
 			return
 		self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \
 			% (sv.title, sv.version)
-		if sv.logmsg and 'log' not in suppress:
-			self.html += '<button id="showtest" class="logbtn">log</button>'
-		if sv.addlogs and 'dmesg' not in suppress:
-			self.html += '<button id="showdmesg" class="logbtn">dmesg</button>'
-		if sv.addlogs and sv.ftracefile and 'ftrace' not in suppress:
-			self.html += '<button id="showftrace" class="logbtn">ftrace</button>'
+		if sv.logmsg and sv.testlog:
+			self.html += '<button id="showtest" class="logbtn btnfmt">log</button>'
+		if sv.dmesglog:
+			self.html += '<button id="showdmesg" class="logbtn btnfmt">dmesg</button>'
+		if sv.ftracelog:
+			self.html += '<button id="showftrace" class="logbtn btnfmt">ftrace</button>'
 		headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
 		self.html += headline_stamp.format(sv.stamp['host'], sv.stamp['kernel'],
 			sv.stamp['mode'], sv.stamp['time'])
+		if 'man' in sv.stamp and 'plat' in sv.stamp and 'cpu' in sv.stamp:
+			headline_sysinfo = '<div class="stamp sysinfo">{0} {1} <i>with</i> {2}</div>\n'
+			self.html += headline_sysinfo.format(sv.stamp['man'],
+				sv.stamp['plat'], sv.stamp['cpu'])
+
 	# Function: getDeviceRows
 	# Description:
 	#    determine how may rows the device funcs will take
@@ -1839,7 +1906,7 @@ class Timeline:
 	#	 devlist: the list of devices/actions in a group of contiguous phases
 	# Output:
 	#	 The total number of rows needed to display this phase of the timeline
-	def getPhaseRows(self, devlist, row=0):
+	def getPhaseRows(self, devlist, row=0, sortby='length'):
 		# clear all rows and set them to undefined
 		remaining = len(devlist)
 		rowdata = dict()
@@ -1852,8 +1919,12 @@ class Timeline:
 			if tp not in myphases:
 				myphases.append(tp)
 			dev['row'] = -1
-			# sort by length 1st, then name 2nd
-			sortdict[item] = (float(dev['end']) - float(dev['start']), item.dev['name'])
+			if sortby == 'start':
+				# sort by start 1st, then length 2nd
+				sortdict[item] = (-1*float(dev['start']), float(dev['end']) - float(dev['start']))
+			else:
+				# sort by length 1st, then name 2nd
+				sortdict[item] = (float(dev['end']) - float(dev['start']), item.dev['name'])
 			if 'src' in dev:
 				dev['devrows'] = self.getDeviceRows(dev['src'])
 		# sort the devlist by length so that large items graph on top
@@ -1995,8 +2066,13 @@ class Timeline:
 #	 A list of values describing the properties of these test runs
 class TestProps:
 	stamp = ''
+	sysinfo = ''
 	S0i3 = False
 	fwdata = []
+	stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
+				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
+				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+	sysinfofmt = '^# sysinfo .*'
 	ftrace_line_fmt_fg = \
 		'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
@@ -2019,6 +2095,36 @@ class TestProps:
 			self.ftrace_line_fmt = self.ftrace_line_fmt_nop
 		else:
 			doError('Invalid tracer format: [%s]' % tracer)
+	def parseStamp(self, data, sv):
+		m = re.match(self.stampfmt, self.stamp)
+		data.stamp = {'time': '', 'host': '', 'mode': ''}
+		dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
+			int(m.group('d')), int(m.group('H')), int(m.group('M')),
+			int(m.group('S')))
+		data.stamp['time'] = dt.strftime('%B %d %Y, %I:%M:%S %p')
+		data.stamp['host'] = m.group('host')
+		data.stamp['mode'] = m.group('mode')
+		data.stamp['kernel'] = m.group('kernel')
+		if re.match(self.sysinfofmt, self.sysinfo):
+			for f in self.sysinfo.split('|'):
+				if '#' in f:
+					continue
+				tmp = f.strip().split(':', 1)
+				key = tmp[0]
+				val = tmp[1]
+				data.stamp[key] = val
+		sv.hostname = data.stamp['host']
+		sv.suspendmode = data.stamp['mode']
+		if sv.suspendmode == 'command' and sv.ftracefile != '':
+			modes = ['on', 'freeze', 'standby', 'mem']
+			out = Popen(['grep', 'suspend_enter', sv.ftracefile],
+				stderr=PIPE, stdout=PIPE).stdout.read()
+			m = re.match('.* suspend_enter\[(?P<mode>.*)\]', out)
+			if m and m.group('mode') in ['1', '2', '3']:
+				sv.suspendmode = modes[int(m.group('mode'))]
+				data.stamp['mode'] = sv.suspendmode
+		if not sv.stamp:
+			sv.stamp = data.stamp
 
 # Class: TestRun
 # Description:
@@ -2090,35 +2196,6 @@ def vprint(msg):
 	if(sysvals.verbose):
 		print(msg)
 
-# Function: parseStamp
-# Description:
-#	 Pull in the stamp comment line from the data file(s),
-#	 create the stamp, and add it to the global sysvals object
-# Arguments:
-#	 m: the valid re.match output for the stamp line
-def parseStamp(line, data):
-	m = re.match(sysvals.stampfmt, line)
-	data.stamp = {'time': '', 'host': '', 'mode': ''}
-	dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
-		int(m.group('d')), int(m.group('H')), int(m.group('M')),
-		int(m.group('S')))
-	data.stamp['time'] = dt.strftime('%B %d %Y, %I:%M:%S %p')
-	data.stamp['host'] = m.group('host')
-	data.stamp['mode'] = m.group('mode')
-	data.stamp['kernel'] = m.group('kernel')
-	sysvals.hostname = data.stamp['host']
-	sysvals.suspendmode = data.stamp['mode']
-	if sysvals.suspendmode == 'command' and sysvals.ftracefile != '':
-		modes = ['on', 'freeze', 'standby', 'mem']
-		out = Popen(['grep', 'suspend_enter', sysvals.ftracefile],
-			stderr=PIPE, stdout=PIPE).stdout.read()
-		m = re.match('.* suspend_enter\[(?P<mode>.*)\]', out)
-		if m and m.group('mode') in ['1', '2', '3']:
-			sysvals.suspendmode = modes[int(m.group('mode'))]
-			data.stamp['mode'] = sysvals.suspendmode
-	if not sysvals.stamp:
-		sysvals.stamp = data.stamp
-
 # Function: doesTraceLogHaveTraceEvents
 # Description:
 #	 Quickly determine if the ftrace log has some or all of the trace events
@@ -2136,11 +2213,6 @@ def doesTraceLogHaveTraceEvents():
 		sysvals.usekprobes = True
 	out = Popen(['head', '-1', sysvals.ftracefile],
 		stderr=PIPE, stdout=PIPE).stdout.read().replace('\n', '')
-	m = re.match(sysvals.stampfmt, out)
-	if m and m.group('mode') == 'command':
-		sysvals.usetraceeventsonly = True
-		sysvals.usetraceevents = True
-		return
 	# figure out what level of trace events are supported
 	sysvals.usetraceeventsonly = True
 	sysvals.usetraceevents = False
@@ -2182,11 +2254,13 @@ def appendIncompleteTraceLog(testruns):
 	for line in tf:
 		# remove any latent carriage returns
 		line = line.replace('\r\n', '')
-		# grab the time stamp
-		m = re.match(sysvals.stampfmt, line)
-		if(m):
+		# grab the stamp and sysinfo
+		if re.match(tp.stampfmt, line):
 			tp.stamp = line
 			continue
+		elif re.match(tp.sysinfofmt, line):
+			tp.sysinfo = line
+			continue
 		# determine the trace data type (required for further parsing)
 		m = re.match(sysvals.tracertypefmt, line)
 		if(m):
@@ -2219,7 +2293,7 @@ def appendIncompleteTraceLog(testruns):
 		# look for the suspend start marker
 		if(t.startMarker()):
 			data = testrun[testidx].data
-			parseStamp(tp.stamp, data)
+			tp.parseStamp(data, sysvals)
 			data.setStart(t.time)
 			continue
 		if(not data):
@@ -2389,11 +2463,13 @@ def parseTraceLog():
 	for line in tf:
 		# remove any latent carriage returns
 		line = line.replace('\r\n', '')
-		# stamp line: each stamp means a new test run
-		m = re.match(sysvals.stampfmt, line)
-		if(m):
+		# stamp and sysinfo lines
+		if re.match(tp.stampfmt, line):
 			tp.stamp = line
 			continue
+		elif re.match(tp.sysinfofmt, line):
+			tp.sysinfo = line
+			continue
 		# firmware line: pull out any firmware data
 		m = re.match(sysvals.firmwarefmt, line)
 		if(m):
@@ -2439,7 +2515,7 @@ def parseTraceLog():
 			testdata.append(data)
 			testrun = TestRun(data)
 			testruns.append(testrun)
-			parseStamp(tp.stamp, data)
+			tp.parseStamp(data, sysvals)
 			data.setStart(t.time)
 			data.tKernSus = t.time
 			continue
@@ -2820,10 +2896,13 @@ def loadKernelLog(justtext=False):
 		idx = line.find('[')
 		if idx > 1:
 			line = line[idx:]
-		m = re.match(sysvals.stampfmt, line)
-		if(m):
+		# grab the stamp and sysinfo
+		if re.match(tp.stampfmt, line):
 			tp.stamp = line
 			continue
+		elif re.match(tp.sysinfofmt, line):
+			tp.sysinfo = line
+			continue
 		m = re.match(sysvals.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
@@ -2839,7 +2918,7 @@ def loadKernelLog(justtext=False):
 			if(data):
 				testruns.append(data)
 			data = Data(len(testruns))
-			parseStamp(tp.stamp, data)
+			tp.parseStamp(data, sysvals)
 			if len(tp.fwdata) > data.testnumber:
 				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
 				if(data.fwSuspend > 0 or data.fwResume > 0):
@@ -3170,6 +3249,8 @@ def addCallgraphs(sv, hf, data):
 			continue
 		list = data.dmesg[p]['list']
 		for devname in data.sortedDevices(p):
+			if len(sv.devicefilter) > 0 and devname not in sv.devicefilter:
+				continue
 			dev = list[devname]
 			color = 'white'
 			if 'color' in data.dmesg[p]:
@@ -3309,7 +3390,6 @@ def createHTML(testruns):
 	html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">ERROR&rarr;</div>\n'
 	html_traceevent = '<div title="{0}" class="traceevent{6}" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;{7}">{5}</div>\n'
 	html_cpuexec = '<div class="jiffie" style="left:{0}%;top:{1}px;height:{2}px;width:{3}%;background:{4};"></div>\n'
-	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	html_timetotal = '<table class="time1">\n<tr>'\
 		'<td class="green" title="{3}">{2} Suspend Time: <b>{0} ms</b></td>'\
 		'<td class="yellow" title="{4}">{2} Resume Time: <b>{1} ms</b></td>'\
@@ -3346,10 +3426,7 @@ def createHTML(testruns):
 	# Generate the header for this timeline
 	for data in testruns:
 		tTotal = data.end - data.start
-		sktime = (data.dmesg['suspend_machine']['end'] - \
-			data.tKernSus) * 1000
-		rktime = (data.dmesg['resume_complete']['end'] - \
-			data.dmesg['resume_machine']['start']) * 1000
+		sktime, rktime = data.getTimeValues()
 		if(tTotal == 0):
 			print('ERROR: No timeline data')
 			sys.exit()
@@ -3581,7 +3658,7 @@ def createHTML(testruns):
 				id += tmp[1][0]
 			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
 			name = string.replace(phase, '_', ' &nbsp;')
-			devtl.html += html_legend.format(order, \
+			devtl.html += devtl.html_legend.format(order, \
 				data.dmesg[phase]['color'], name, id)
 		devtl.html += '</div>\n'
 
@@ -3628,10 +3705,10 @@ def createHTML(testruns):
 		addCallgraphs(sysvals, hf, data)
 
 	# add the test log as a hidden div
-	if sysvals.logmsg:
+	if sysvals.testlog and sysvals.logmsg:
 		hf.write('<div id="testlog" style="display:none;">\n'+sysvals.logmsg+'</div>\n')
 	# add the dmesg log as a hidden div
-	if sysvals.addlogs and sysvals.dmesgfile:
+	if sysvals.dmesglog and sysvals.dmesgfile:
 		hf.write('<div id="dmesglog" style="display:none;">\n')
 		lf = open(sysvals.dmesgfile, 'r')
 		for line in lf:
@@ -3640,7 +3717,7 @@ def createHTML(testruns):
 		lf.close()
 		hf.write('</div>\n')
 	# add the ftrace log as a hidden div
-	if sysvals.addlogs and sysvals.ftracefile:
+	if sysvals.ftracelog and sysvals.ftracefile:
 		hf.write('<div id="ftracelog" style="display:none;">\n')
 		lf = open(sysvals.ftracefile, 'r')
 		for line in lf:
@@ -3701,6 +3778,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
 	<style type=\'text/css\'>\n\
 		body {overflow-y:scroll;}\n\
 		.stamp {width:100%;text-align:center;background:gray;line-height:30px;color:white;font:25px Arial;}\n\
+		.stamp.sysinfo {font:10px Arial;}\n\
 		.callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
 		.callgraph article * {padding-left:28px;}\n\
 		h1 {color:black;font:bold 30px Times;}\n\
@@ -3746,7 +3824,7 @@ def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
 		.legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
 		.legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
 		button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
-		.logbtn {position:relative;float:right;height:25px;width:50px;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
+		.btnfmt {position:relative;float:right;height:25px;width:auto;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
 		.devlist {position:'+devlistpos+';width:190px;}\n\
 		a:link {color:white;text-decoration:none;}\n\
 		a:visited {color:white;}\n\
@@ -4084,8 +4162,6 @@ def addScriptCode(hf, testruns):
 	'		win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\
 	'		win.document.close();\n'\
 	'	}\n'\
-	'	function onClickPhase(e) {\n'\
-	'	}\n'\
 	'	function onMouseDown(e) {\n'\
 	'		dragval[0] = e.clientX;\n'\
 	'		dragval[1] = document.getElementById("dmesgzoombox").scrollLeft;\n'\
@@ -4120,9 +4196,6 @@ def addScriptCode(hf, testruns):
 	'		document.getElementById("zoomin").onclick = zoomTimeline;\n'\
 	'		document.getElementById("zoomout").onclick = zoomTimeline;\n'\
 	'		document.getElementById("zoomdef").onclick = zoomTimeline;\n'\
-	'		var list = document.getElementsByClassName("square");\n'\
-	'		for (var i = 0; i < list.length; i++)\n'\
-	'			list[i].onclick = onClickPhase;\n'\
 	'		var list = document.getElementsByClassName("err");\n'\
 	'		for (var i = 0; i < list.length; i++)\n'\
 	'			list[i].onclick = errWindow;\n'\
@@ -4193,8 +4266,14 @@ def executeSuspend():
 		if sysvals.testcommand != '':
 			call(sysvals.testcommand+' 2>&1', shell=True);
 		else:
+			mode = sysvals.suspendmode
+			if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
+				mode = 'mem'
+				pf = open(sysvals.mempowerfile, 'w')
+				pf.write(sysvals.memmode)
+				pf.close()
 			pf = open(sysvals.powerfile, 'w')
-			pf.write(sysvals.suspendmode)
+			pf.write(mode)
 			# execution will pause here
 			try:
 				pf.close()
@@ -4219,24 +4298,15 @@ def executeSuspend():
 			pm.stop()
 		sysvals.fsetVal('0', 'tracing_on')
 		print('CAPTURING TRACE')
-		writeDatafileHeader(sysvals.ftracefile, fwdata)
+		sysvals.writeDatafileHeader(sysvals.ftracefile, fwdata)
 		call('cat '+tp+'trace >> '+sysvals.ftracefile, shell=True)
 		sysvals.fsetVal('', 'trace')
 		devProps()
 	# grab a copy of the dmesg output
 	print('CAPTURING DMESG')
-	writeDatafileHeader(sysvals.dmesgfile, fwdata)
+	sysvals.writeDatafileHeader(sysvals.dmesgfile, fwdata)
 	sysvals.getdmesg()
 
-def writeDatafileHeader(filename, fwdata):
-	fp = open(filename, 'a')
-	fp.write(sysvals.teststamp+'\n')
-	if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
-		for fw in fwdata:
-			if(fw):
-				fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
-	fp.close()
-
 # Function: setUSBDevicesAuto
 # Description:
 #	 Set the autosuspend control parameter of all USB devices to auto
@@ -4244,7 +4314,7 @@ def writeDatafileHeader(filename, fwdata):
 #	 to always-on since the kernel cant determine if the device can
 #	 properly autosuspend
 def setUSBDevicesAuto():
-	rootCheck(True)
+	sysvals.rootCheck(True)
 	for dirname, dirnames, filenames in os.walk('/sys/devices'):
 		if(re.match('.*/usb[0-9]*.*', dirname) and
 			'idVendor' in filenames and 'idProduct' in filenames):
@@ -4467,13 +4537,146 @@ def devProps(data=0):
 # Output:
 #	 A string list of the available modes
 def getModes():
-	modes = ''
+	modes = []
 	if(os.path.exists(sysvals.powerfile)):
 		fp = open(sysvals.powerfile, 'r')
 		modes = string.split(fp.read())
 		fp.close()
+	if(os.path.exists(sysvals.mempowerfile)):
+		deep = False
+		fp = open(sysvals.mempowerfile, 'r')
+		for m in string.split(fp.read()):
+			memmode = m.strip('[]')
+			if memmode == 'deep':
+				deep = True
+			else:
+				modes.append('mem-%s' % memmode)
+		fp.close()
+		if 'mem' in modes and not deep:
+			modes.remove('mem')
 	return modes
 
+# Function: dmidecode
+# Description:
+#	 Read the bios tables and pull out system info
+# Arguments:
+#	 mempath: /dev/mem or custom mem path
+#	 fatal: True to exit on error, False to return empty dict
+# Output:
+#	 A dict object with all available key/values
+def dmidecode(mempath, fatal=False):
+	out = dict()
+
+	# the list of values to retrieve, with hardcoded (type, idx)
+	info = {
+		'bios-vendor': (0, 4),
+		'bios-version': (0, 5),
+		'bios-release-date': (0, 8),
+		'system-manufacturer': (1, 4),
+		'system-product-name': (1, 5),
+		'system-version': (1, 6),
+		'system-serial-number': (1, 7),
+		'baseboard-manufacturer': (2, 4),
+		'baseboard-product-name': (2, 5),
+		'baseboard-version': (2, 6),
+		'baseboard-serial-number': (2, 7),
+		'chassis-manufacturer': (3, 4),
+		'chassis-type': (3, 5),
+		'chassis-version': (3, 6),
+		'chassis-serial-number': (3, 7),
+		'processor-manufacturer': (4, 7),
+		'processor-version': (4, 16),
+	}
+	if(not os.path.exists(mempath)):
+		if(fatal):
+			doError('file does not exist: %s' % mempath)
+		return out
+	if(not os.access(mempath, os.R_OK)):
+		if(fatal):
+			doError('file is not readable: %s' % mempath)
+		return out
+
+	# by default use legacy scan, but try to use EFI first
+	memaddr = 0xf0000
+	memsize = 0x10000
+	for ep in ['/sys/firmware/efi/systab', '/proc/efi/systab']:
+		if not os.path.exists(ep) or not os.access(ep, os.R_OK):
+			continue
+		fp = open(ep, 'r')
+		buf = fp.read()
+		fp.close()
+		i = buf.find('SMBIOS=')
+		if i >= 0:
+			try:
+				memaddr = int(buf[i+7:], 16)
+				memsize = 0x20
+			except:
+				continue
+
+	# read in the memory for scanning
+	fp = open(mempath, 'rb')
+	try:
+		fp.seek(memaddr)
+		buf = fp.read(memsize)
+	except:
+		if(fatal):
+			doError('DMI table is unreachable, sorry')
+		else:
+			return out
+	fp.close()
+
+	# search for either an SM table or DMI table
+	i = base = length = num = 0
+	while(i < memsize):
+		if buf[i:i+4] == '_SM_' and i < memsize - 16:
+			length = struct.unpack('H', buf[i+22:i+24])[0]
+			base, num = struct.unpack('IH', buf[i+24:i+30])
+			break
+		elif buf[i:i+5] == '_DMI_':
+			length = struct.unpack('H', buf[i+6:i+8])[0]
+			base, num = struct.unpack('IH', buf[i+8:i+14])
+			break
+		i += 16
+	if base == 0 and length == 0 and num == 0:
+		if(fatal):
+			doError('Neither SMBIOS nor DMI were found')
+		else:
+			return out
+
+	# read in the SM or DMI table
+	fp = open(mempath, 'rb')
+	try:
+		fp.seek(base)
+		buf = fp.read(length)
+	except:
+		if(fatal):
+			doError('DMI table is unreachable, sorry')
+		else:
+			return out
+	fp.close()
+
+	# scan the table for the values we want
+	count = i = 0
+	while(count < num and i <= len(buf) - 4):
+		type, size, handle = struct.unpack('BBH', buf[i:i+4])
+		n = i + size
+		while n < len(buf) - 1:
+			if 0 == struct.unpack('H', buf[n:n+2])[0]:
+				break
+			n += 1
+		data = buf[i+size:n+2].split('\0')
+		for name in info:
+			itype, idxadr = info[name]
+			if itype == type:
+				idx = struct.unpack('B', buf[i+idxadr])[0]
+				if idx > 0 and idx < len(data) - 1:
+					s = data[idx-1].strip()
+					if s and s.lower() != 'to be filled by o.e.m.':
+						out[name] = data[idx-1]
+		i = n + 2
+		count += 1
+	return out
+
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -4487,7 +4690,7 @@ def getFPDT(output):
 	prectype[0] = 'Basic S3 Resume Performance Record'
 	prectype[1] = 'Basic S3 Suspend Performance Record'
 
-	rootCheck(True)
+	sysvals.rootCheck(True)
 	if(not os.path.exists(sysvals.fpdtpath)):
 		if(output):
 			doError('file does not exist: %s' % sysvals.fpdtpath)
@@ -4617,7 +4820,7 @@ def statusCheck(probecheck=False):
 
 	# check we have root access
 	res = sysvals.colorText('NO (No features of this tool will work!)')
-	if(rootCheck(False)):
+	if(sysvals.rootCheck(False)):
 		res = 'YES'
 	print('    have root access: %s' % res)
 	if(res != 'YES'):
@@ -4716,16 +4919,6 @@ def doError(msg, help=False):
 	print('ERROR: %s\n') % msg
 	sys.exit()
 
-# Function: rootCheck
-# Description:
-#	 quick check to see if we have root access
-def rootCheck(fatal):
-	if(os.access(sysvals.powerfile, os.W_OK)):
-		return True
-	if fatal:
-		doError('This command requires sysfs mount and root access')
-	return False
-
 # Function: getArgInt
 # Description:
 #	 pull out an integer argument from the command line with checks
@@ -4779,6 +4972,7 @@ def processData():
 		if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)):
 			appendIncompleteTraceLog(testruns)
 	createHTML(testruns)
+	return testruns
 
 # Function: rerunTest
 # Description:
@@ -4790,17 +4984,20 @@ def rerunTest():
 		doError('recreating this html output requires a dmesg file')
 	sysvals.setOutputFile()
 	vprint('Output file: %s' % sysvals.htmlfile)
-	if(os.path.exists(sysvals.htmlfile) and not os.access(sysvals.htmlfile, os.W_OK)):
-		doError('missing permission to write to %s' % sysvals.htmlfile)
-	processData()
+	if os.path.exists(sysvals.htmlfile):
+		if not os.path.isfile(sysvals.htmlfile):
+			doError('a directory already exists with this name: %s' % sysvals.htmlfile)
+		elif not os.access(sysvals.htmlfile, os.W_OK):
+			doError('missing permission to write to %s' % sysvals.htmlfile)
+	return processData()
 
 # Function: runTest
 # Description:
 #	 execute a suspend/resume, gather the logs, and generate the output
-def runTest(subdir, testpath=''):
+def runTest():
 	# prepare for the test
 	sysvals.initFtrace()
-	sysvals.initTestOutput(subdir, testpath)
+	sysvals.initTestOutput('suspend')
 	vprint('Output files:\n\t%s\n\t%s\n\t%s' % \
 		(sysvals.dmesgfile, sysvals.ftracefile, sysvals.htmlfile))
 
@@ -4897,7 +5094,7 @@ def configFromFile(file):
 			if(opt.lower() == 'verbose'):
 				sysvals.verbose = checkArgBool(value)
 			elif(opt.lower() == 'addlogs'):
-				sysvals.addlogs = checkArgBool(value)
+				sysvals.dmesglog = sysvals.ftracelog = checkArgBool(value)
 			elif(opt.lower() == 'dev'):
 				sysvals.usedevsrc = checkArgBool(value)
 			elif(opt.lower() == 'proc'):
@@ -4947,7 +5144,7 @@ def configFromFile(file):
 			elif(opt.lower() == 'mincg'):
 				sysvals.mincglen = getArgFloat('-mincg', value, 0.0, 10000.0, False)
 			elif(opt.lower() == 'output-dir'):
-				sysvals.setOutputFolder(value)
+				sysvals.testdir = sysvals.setOutputFolder(value)
 
 	if sysvals.suspendmode == 'command' and not sysvals.testcommand:
 		doError('No command supplied for mode "command"')
@@ -5030,8 +5227,6 @@ def configFromFile(file):
 # Description:
 #	 print out the help text
 def printHelp():
-	modes = getModes()
-
 	print('')
 	print('%s v%s' % (sysvals.title, sysvals.version))
 	print('Usage: sudo sleepgraph <options> <commands>')
@@ -5048,7 +5243,7 @@ def printHelp():
 	print('  If no specific command is given, the default behavior is to initiate')
 	print('  a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
 	print('')
-	print('  Generates output files in subdirectory: suspend-mmddyy-HHMMSS')
+	print('  Generates output files in subdirectory: suspend-yymmdd-HHMMSS')
 	print('   HTML output:                    <hostname>_<mode>.html')
 	print('   raw dmesg output:               <hostname>_<mode>_dmesg.txt')
 	print('   raw ftrace output:              <hostname>_<mode>_ftrace.txt')
@@ -5058,8 +5253,9 @@ def printHelp():
 	print('   -v           Print the current tool version')
 	print('   -config fn   Pull arguments and config options from file fn')
 	print('   -verbose     Print extra information during execution and analysis')
-	print('   -m mode      Mode to initiate for suspend %s (default: %s)') % (modes, sysvals.suspendmode)
-	print('   -o subdir    Override the output subdirectory')
+	print('   -m mode      Mode to initiate for suspend (default: %s)') % (sysvals.suspendmode)
+	print('   -o name      Overrides the output subdirectory name when running a new test')
+	print('                default: suspend-{date}-{time}')
 	print('   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
 	print('   -addlogs     Add the dmesg and ftrace logs to the html output')
 	print('   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)')
@@ -5084,17 +5280,20 @@ def printHelp():
 	print('   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)')
 	print('   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
 	print('   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
-	print('  [commands]')
-	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
-	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
-	print('   -summary directory  Create a summary of all test in this dir')
+	print('')
+	print('Other commands:')
 	print('   -modes       List available suspend modes')
 	print('   -status      Test to see if the system is enabled to run this tool')
 	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
+	print('   -sysinfo     Print out system info extracted from BIOS')
 	print('   -usbtopo     Print out the current USB topology with power info')
 	print('   -usbauto     Enable autosuspend for all connected USB devices')
 	print('   -flist       Print the list of functions currently being captured in ftrace')
 	print('   -flistall    Print all functions capable of being captured in ftrace')
+	print('   -summary directory  Create a summary of all test in this dir')
+	print('  [redo]')
+	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
+	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
 	print('')
 	return True
 
@@ -5102,9 +5301,9 @@ def printHelp():
 # exec start (skipped if script is loaded as library)
 if __name__ == '__main__':
 	cmd = ''
-	cmdarg = ''
+	outdir = ''
 	multitest = {'run': False, 'count': 0, 'delay': 0}
-	simplecmds = ['-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status']
+	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-usbtopo', '-usbauto', '-status']
 	# loop through the command line arguments
 	args = iter(sys.argv[1:])
 	for arg in args:
@@ -5135,7 +5334,7 @@ if __name__ == '__main__':
 		elif(arg == '-f'):
 			sysvals.usecallgraph = True
 		elif(arg == '-addlogs'):
-			sysvals.addlogs = True
+			sysvals.dmesglog = sysvals.ftracelog = True
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
 		elif(arg == '-proc'):
@@ -5195,7 +5394,7 @@ if __name__ == '__main__':
 				val = args.next()
 			except:
 				doError('No subdirectory name supplied', True)
-			sysvals.setOutputFolder(val)
+			outdir = sysvals.setOutputFolder(val)
 		elif(arg == '-config'):
 			try:
 				val = args.next()
@@ -5236,7 +5435,7 @@ if __name__ == '__main__':
 			except:
 				doError('No directory supplied', True)
 			cmd = 'summary'
-			cmdarg = val
+			outdir = val
 			sysvals.notestrun = True
 			if(os.path.isdir(val) == False):
 				doError('%s is not accesible' % val)
@@ -5260,11 +5459,14 @@ if __name__ == '__main__':
 		sysvals.mincglen = sysvals.mindevlen
 
 	# just run a utility command and exit
+	sysvals.cpuInfo()
 	if(cmd != ''):
 		if(cmd == 'status'):
 			statusCheck(True)
 		elif(cmd == 'fpdt'):
 			getFPDT(True)
+		elif(cmd == 'sysinfo'):
+			sysvals.printSystemInfo()
 		elif(cmd == 'usbtopo'):
 			detectUSB()
 		elif(cmd == 'modes'):
@@ -5276,7 +5478,7 @@ if __name__ == '__main__':
 		elif(cmd == 'usbauto'):
 			setUSBDevicesAuto()
 		elif(cmd == 'summary'):
-			runSummary(cmdarg, True)
+			runSummary(outdir, True)
 		sys.exit()
 
 	# if instructed, re-analyze existing data files
@@ -5289,21 +5491,43 @@ if __name__ == '__main__':
 		print('Check FAILED, aborting the test run!')
 		sys.exit()
 
+	# extract mem modes and convert
+	mode = sysvals.suspendmode
+	if 'mem' == mode[:3]:
+		if '-' in mode:
+			memmode = mode.split('-')[-1]
+		else:
+			memmode = 'deep'
+		if memmode == 'shallow':
+			mode = 'standby'
+		elif memmode ==  's2idle':
+			mode = 'freeze'
+		else:
+			mode = 'mem'
+		sysvals.memmode = memmode
+		sysvals.suspendmode = mode
+
+	sysvals.systemInfo(dmidecode(sysvals.mempath))
+
 	if multitest['run']:
 		# run multiple tests in a separate subdirectory
-		s = 'x%d' % multitest['count']
-		if not sysvals.outdir:
-			sysvals.outdir = datetime.now().strftime('suspend-'+s+'-%m%d%y-%H%M%S')
-		if not os.path.isdir(sysvals.outdir):
-			os.mkdir(sysvals.outdir)
+		if not outdir:
+			s = 'suspend-x%d' % multitest['count']
+			outdir = datetime.now().strftime(s+'-%y%m%d-%H%M%S')
+		if not os.path.isdir(outdir):
+			os.mkdir(outdir)
 		for i in range(multitest['count']):
 			if(i != 0):
 				print('Waiting %d seconds...' % (multitest['delay']))
 				time.sleep(multitest['delay'])
 			print('TEST (%d/%d) START' % (i+1, multitest['count']))
-			runTest(sysvals.outdir)
+			fmt = 'suspend-%y%m%d-%H%M%S'
+			sysvals.testdir = os.path.join(outdir, datetime.now().strftime(fmt))
+			runTest()
 			print('TEST (%d/%d) COMPLETE' % (i+1, multitest['count']))
-		runSummary(sysvals.outdir, False)
+		runSummary(outdir, False)
 	else:
+		if outdir:
+			sysvals.testdir = outdir
 		# run the test in the current directory
-		runTest('.', sysvals.outdir)
+		runTest()
diff --git a/tools/power/pm-graph/bootgraph.8 b/tools/power/pm-graph/bootgraph.8
index 55272a67b0e7..dbdafcf546df 100644
--- a/tools/power/pm-graph/bootgraph.8
+++ b/tools/power/pm-graph/bootgraph.8
@@ -8,14 +8,23 @@ bootgraph \- Kernel boot timing analysis
 .RB [ COMMAND ]
 .SH DESCRIPTION
 \fBbootgraph \fP reads the dmesg log from kernel boot and
-creates an html representation of the initcall timeline up to the start
-of the init process.
+creates an html representation of the initcall timeline. It graphs
+every module init call found, through both kernel and user modes. The
+timeline is split into two phases: kernel mode & user mode. kernel mode
+represents a single process run on a single cpu with serial init calls.
+Once user mode begins, the init process is called, and the init calls
+start working in parallel.
 .PP
 If no specific command is given, the tool reads the current dmesg log and
-outputs bootgraph.html.
+outputs a new timeline.
 .PP
 The tool can also augment the timeline with ftrace data on custom target
 functions as well as full trace callgraphs.
+.PP
+Generates output files in subdirectory: boot-yymmdd-HHMMSS
+   html timeline   :     <hostname>_boot.html
+   raw dmesg file  :     <hostname>_boot_dmesg.txt
+   raw ftrace file :     <hostname>_boot_ftrace.txt
 .SH OPTIONS
 .TP
 \fB-h\fR
@@ -28,15 +37,18 @@ Print the current tool version
 Add the dmesg log to the html output. It will be viewable by
 clicking a button in the timeline.
 .TP
-\fB-o \fIfile\fR
-Override the HTML output filename (default: bootgraph.html)
-.SS "Ftrace Debug"
+\fB-o \fIname\fR
+Overrides the output subdirectory name when running a new test.
+Use {date}, {time}, {hostname} for current values.
+.sp
+e.g. boot-{hostname}-{date}-{time}
+.SS "advanced"
 .TP
 \fB-f\fR
 Use ftrace to add function detail (default: disabled)
 .TP
 \fB-callgraph\fR
-Use ftrace to create initcall callgraphs (default: disabled). If -filter
+Use ftrace to create initcall callgraphs (default: disabled). If -func
 is not used there will be one callgraph per initcall. This can produce
 very large outputs, i.e. 10MB - 100MB.
 .TP
@@ -50,16 +62,19 @@ This reduces the html file size as there can be many tiny callgraphs
 which are barely visible in the timeline.
 The value is a float: e.g. 0.001 represents 1 us.
 .TP
+\fB-cgfilter \fI"func1,func2,..."\fR
+Reduce callgraph output in the timeline by limiting it to a list of calls. The
+argument can be a single function name or a comma delimited list.
+(default: none)
+.TP
 \fB-timeprec \fIn\fR
 Number of significant digits in timestamps (0:S, 3:ms, [6:us])
 .TP
 \fB-expandcg\fR
 pre-expand the callgraph data in the html output (default: disabled)
 .TP
-\fB-filter \fI"func1,func2,..."\fR
+\fB-func \fI"func1,func2,..."\fR
 Instead of tracing each initcall, trace a custom list of functions (default: do_one_initcall)
-
-.SH COMMANDS
 .TP
 \fB-reboot\fR
 Reboot the machine and generate a new timeline automatically. Works in 4 steps.
@@ -73,16 +88,23 @@ Show the requirements to generate a new timeline manually. Requires 3 steps.
   1. append the string to the kernel command line via your native boot manager.
   2. reboot the system
   3. after startup, re-run the tool with the same arguments and no command
+
+.SH COMMANDS
+.SS "rebuild"
 .TP
 \fB-dmesg \fIfile\fR
 Create HTML output from an existing dmesg file.
 .TP
 \fB-ftrace \fIfile\fR
 Create HTML output from an existing ftrace file (used with -dmesg).
+.SS "other"
 .TP
 \fB-flistall\fR
 Print all ftrace functions capable of being captured. These are all the
-possible values you can add to trace via the -filter argument.
+possible values you can add to trace via the -func argument.
+.TP
+\fB-sysinfo\fR
+Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
 
 .SH EXAMPLES
 Create a timeline using the current dmesg log.
@@ -93,13 +115,13 @@ Create a timeline using the current dmesg and ftrace log.
 .IP
 \f(CW$ bootgraph -callgraph\fR
 .PP
-Create a timeline using the current dmesg, add the log to the html and change the name.
+Create a timeline using the current dmesg, add the log to the html and change the folder.
 .IP
-\f(CW$ bootgraph -addlogs -o myboot.html\fR
+\f(CW$ bootgraph -addlogs -o "myboot-{date}-{time}"\fR
 .PP
 Capture a new boot timeline by automatically rebooting the machine.
 .IP
-\f(CW$ sudo bootgraph -reboot -addlogs -o latestboot.html\fR
+\f(CW$ sudo bootgraph -reboot -addlogs -o "latest-{hostname)"\fR
 .PP
 Capture a new boot timeline with function trace data.
 .IP
@@ -111,7 +133,7 @@ Capture a new boot timeline with trace & callgraph data. Skip callgraphs smaller
 .PP
 Capture a new boot timeline with callgraph data over custom functions.
 .IP
-\f(CW$ sudo bootgraph -reboot -callgraph -filter "acpi_ps_parse_aml,msleep"\fR
+\f(CW$ sudo bootgraph -reboot -callgraph -func "acpi_ps_parse_aml,msleep"\fR
 .PP
 Capture a brand new boot timeline with manual reboot.
 .IP
@@ -123,6 +145,15 @@ Capture a brand new boot timeline with manual reboot.
 .IP
 \f(CW$ sudo bootgraph -callgraph # re-run the tool after restart\fR
 .PP
+.SS "rebuild timeline from logs"
+.PP
+Rebuild the html from a previous run's logs, using the same options.
+.IP
+\f(CW$ bootgraph -dmesg dmesg.txt -ftrace ftrace.txt -callgraph\fR
+.PP
+Rebuild the html with different options.
+.IP
+\f(CW$ bootgraph -dmesg dmesg.txt -ftrace ftrace.txt -addlogs\fR
 
 .SH "SEE ALSO"
 dmesg(1), update-grub(8), crontab(1), reboot(8)
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 610e72ebbc06..fbe7bd3eae8e 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -39,8 +39,9 @@ Pull arguments and config options from a file.
 \fB-m \fImode\fR
 Mode to initiate for suspend e.g. standby, freeze, mem (default: mem).
 .TP
-\fB-o \fIsubdir\fR
-Override the output subdirectory. Use {date}, {time}, {hostname} for current values.
+\fB-o \fIname\fR
+Overrides the output subdirectory name when running a new test.
+Use {date}, {time}, {hostname} for current values.
 .sp
 e.g. suspend-{hostname}-{date}-{time}
 .TP
@@ -52,7 +53,7 @@ disable rtcwake and require a user keypress to resume.
 Add the dmesg and ftrace logs to the html output. They will be viewable by
 clicking buttons in the timeline.
 
-.SS "Advanced"
+.SS "advanced"
 .TP
 \fB-cmd \fIstr\fR
 Run the timeline over a custom suspend command, e.g. pm-suspend. By default
@@ -91,7 +92,7 @@ Include \fIt\fR ms delay after last resume (default: 0 ms).
 Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will
 be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}.
 
-.SS "Ftrace Debug"
+.SS "ftrace debug"
 .TP
 \fB-f\fR
 Use ftrace to create device callgraphs (default: disabled). This can produce
@@ -124,12 +125,6 @@ Number of significant digits in timestamps (0:S, [3:ms], 6:us).
 
 .SH COMMANDS
 .TP
-\fB-ftrace \fIfile\fR
-Create HTML output from an existing ftrace file.
-.TP
-\fB-dmesg \fIfile\fR
-Create HTML output from an existing dmesg file.
-.TP
 \fB-summary \fIindir\fR
 Create a summary page of all tests in \fIindir\fR. Creates summary.html
 in the current folder. The output page is a table of tests with
@@ -146,6 +141,9 @@ with any options you intend to use to see if they will work.
 \fB-fpdt\fR
 Print out the contents of the ACPI Firmware Performance Data Table.
 .TP
+\fB-sysinfo\fR
+Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
+.TP
 \fB-usbtopo\fR
 Print out the current USB topology with power info.
 .TP
@@ -162,9 +160,16 @@ with -fadd they will also be checked.
 \fB-flistall\fR
 Print all ftrace functions capable of being captured. These are all the
 possible values you can add to trace via the -fadd argument.
+.SS "rebuild"
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file.
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
 
 .SH EXAMPLES
-.SS "Simple Commands"
+.SS "simple commands"
 Check which suspend modes are currently supported.
 .IP
 \f(CW$ sleepgraph -modes\fR
@@ -185,12 +190,8 @@ Generate a summary of all timelines in a particular folder.
 .IP
 \f(CW$ sleepgraph -summary ~/workspace/myresults/\fR
 .PP
-Re-generate the html output from a previous run's dmesg and ftrace log.
-.IP
-\f(CW$ sleepgraph -dmesg myhost_mem_dmesg.txt -ftrace myhost_mem_ftrace.txt\fR
-.PP
 
-.SS "Capturing Simple Timelines"
+.SS "capturing basic timelines"
 Execute a mem suspend with a 15 second wakeup. Include the logs in the html.
 .IP
 \f(CW$ sudo sleepgraph -rtcwake 15 -addlogs\fR
@@ -204,7 +205,7 @@ Execute a freeze with no wakeup (require keypress). Change output folder name.
 \f(CW$ sudo sleepgraph -m freeze -rtcwake off -o "freeze-{hostname}-{date}-{time}"\fR
 .PP
 
-.SS "Capturing Advanced Timelines"
+.SS "capturing advanced timelines"
 Execute a suspend & include dev mode source calls, limit callbacks to 5ms or larger.
 .IP
 \f(CW$ sudo sleepgraph -m mem -rtcwake 15 -dev -mindev 5\fR
@@ -222,8 +223,7 @@ Execute a suspend using a custom command.
 \f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR
 .PP
 
-
-.SS "Capturing Timelines with Callgraph Data"
+.SS "adding callgraph data"
 Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger.
 .IP
 \f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f -maxdepth 5 -mincg 10\fR
@@ -235,6 +235,16 @@ Capture a full callgraph across all suspend, then filter the html by a single ph
 \f(CW$ sleepgraph -dmesg host_mem_dmesg.txt -ftrace host_mem_ftrace.txt -f -cgphase resume
 .PP
 
+.SS "rebuild timeline from logs"
+.PP
+Rebuild the html from a previous run's logs, using the same options.
+.IP
+\f(CW$ sleepgraph -dmesg dmesg.txt -ftrace ftrace.txt -callgraph\fR
+.PP
+Rebuild the html with different options.
+.IP
+\f(CW$ sleepgraph -dmesg dmesg.txt -ftrace ftrace.txt -addlogs -srgap\fR
+
 .SH "SEE ALSO"
 dmesg(1)
 .PP
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 1e8b6116ba3c..9dc8f078a83c 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -1,7 +1,7 @@
 ifneq ($(O),)
 ifeq ($(origin O), command line)
-	dummy := $(if $(shell test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
-	ABSOLUTE_O := $(shell cd $(O) ; pwd)
+	ABSOLUTE_O := $(realpath $(O))
+	dummy := $(if $(ABSOLUTE_O),,$(error O=$(O) does not exist))
 	OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
 	COMMAND_O := O=$(ABSOLUTE_O)
 ifeq ($(objtree),)
@@ -12,7 +12,7 @@ endif
 
 # check that the output directory actually exists
 ifneq ($(OUTPUT),)
-OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+OUTDIR := $(realpath $(OUTPUT))
 $(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
 endif
 
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 4c2fa98ef39d..bef419d4266d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1527,9 +1527,6 @@ static void nfit_test1_setup(struct nfit_test *t)
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
-	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
 }
 
 static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -1546,8 +1543,8 @@ static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
 	else {
 		memcpy(iobuf, mmio->addr.base + dpa, len);
 
-		/* give us some some coverage of the mmio_flush_range() API */
-		mmio_flush_range(mmio->addr.base + dpa, len);
+		/* give us some some coverage of the arch_invalidate_pmem() API */
+		arch_invalidate_pmem(mmio->addr.base + dpa, len);
 	}
 	nd_region_release_lane(nd_region, lane);
 
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 26ce4f7168be..ff805643b5f7 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -52,6 +52,10 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
 BUILD := $(O)
 ifndef BUILD
   BUILD := $(KBUILD_OUTPUT)
@@ -62,32 +66,32 @@ endif
 
 export BUILD
 all:
-	for TARGET in $(TARGETS); do		\
+	@for TARGET in $(TARGETS); do		\
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_tests: all
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
 	done;
 
 hotplug:
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_hotplug: hotplug
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
 	done;
 
 clean_hotplug:
-	for TARGET in $(TARGETS_HOTPLUG); do \
+	@for TARGET in $(TARGETS_HOTPLUG); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
@@ -103,7 +107,7 @@ install:
 ifdef INSTALL_PATH
 	@# Ask all targets to install their files
 	mkdir -p $(INSTALL_PATH)
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
 	done;
@@ -128,7 +132,7 @@ else
 endif
 
 clean:
-	for TARGET in $(TARGETS); do \
+	@for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 153c3a181a4c..f4b23d697448 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -15,9 +15,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align
 
 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
-	test_pkt_md_access.o
+	test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o
 
-TEST_PROGS := test_kmod.sh
+TEST_PROGS := test_kmod.sh test_xdp_redirect.sh
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index d50ac342dc92..36fb9161b34a 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -38,6 +38,8 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
 	(void *) BPF_FUNC_clone_redirect;
 static int (*bpf_redirect)(int ifindex, int flags) =
 	(void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_redirect_map;
 static int (*bpf_perf_event_output)(void *ctx, void *map,
 				    unsigned long long flags, void *data,
 				    int size) =
@@ -63,6 +65,12 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
 static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
 			     int optlen) =
 	(void *) BPF_FUNC_setsockopt;
+static int (*bpf_sk_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sock_map_update)(void *map, void *key, void *value,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_sock_map_update;
+
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
@@ -85,6 +93,7 @@ struct bpf_map_def {
 	unsigned int max_entries;
 	unsigned int map_flags;
 	unsigned int inner_map_idx;
+	unsigned int numa_node;
 };
 
 static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 20ecbaa0d85d..6c53a8906eff 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -12,6 +12,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	unsigned int start, end, possible_cpus = 0;
 	char buff[128];
 	FILE *fp;
+	int n;
 
 	fp = fopen(fcpu, "r");
 	if (!fp) {
@@ -20,17 +21,17 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	}
 
 	while (fgets(buff, sizeof(buff), fp)) {
-		if (sscanf(buff, "%u-%u", &start, &end) == 2) {
-			possible_cpus = start == 0 ? end + 1 : 0;
-			break;
+		n = sscanf(buff, "%u-%u", &start, &end);
+		if (n == 0) {
+			printf("Failed to retrieve # possible CPUs!\n");
+			exit(1);
+		} else if (n == 1) {
+			end = start;
 		}
+		possible_cpus = start == 0 ? end + 1 : 0;
+		break;
 	}
-
 	fclose(fp);
-	if (!possible_cpus) {
-		printf("Failed to retrieve # possible CPUs!\n");
-		exit(1);
-	}
 
 	return possible_cpus;
 }
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
new file mode 100644
index 000000000000..fae3b96c3aa4
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -0,0 +1,38 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long) skb->data_end;
+	void *data = (void *)(long) skb->data;
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	__u8 *d = data;
+
+	if (data + 10 > data_end)
+		return skb->len;
+
+	/* This write/read is a bit pointless but tests the verifier and
+	 * strparser handler for read/write pkt data and access into sk
+	 * fields.
+	 */
+	d[7] = 1;
+
+	bpf_printk("parse: data[0] = (%u): local_port %i remote %i\n",
+		   d[0], lport, bpf_ntohl(rport));
+	return skb->len;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
new file mode 100644
index 000000000000..9b99bd10807d
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -0,0 +1,68 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map_rx = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_break = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long) skb->data_end;
+	void *data = (void *)(long) skb->data;
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	__u8 *d = data;
+	__u8 sk, map;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	map = d[0];
+	sk = d[1];
+
+	d[0] = 0xd;
+	d[1] = 0xe;
+	d[2] = 0xa;
+	d[3] = 0xd;
+	d[4] = 0xb;
+	d[5] = 0xe;
+	d[6] = 0xe;
+	d[7] = 0xf;
+
+	bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
+
+	if (!map)
+		return bpf_sk_redirect_map(&sock_map_rx, sk, 0);
+	return bpf_sk_redirect_map(&sock_map_tx, sk, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 29793694cbc7..8591c89c0828 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -27,6 +27,11 @@
 #define MAX_INSNS	512
 #define MAX_MATCHES	16
 
+struct bpf_reg_match {
+	unsigned int line;
+	const char *match;
+};
+
 struct bpf_align_test {
 	const char *descr;
 	struct bpf_insn	insns[MAX_INSNS];
@@ -36,10 +41,14 @@ struct bpf_align_test {
 		REJECT
 	} result;
 	enum bpf_prog_type prog_type;
-	const char *matches[MAX_MATCHES];
+	/* Matches must be in order of increasing line */
+	struct bpf_reg_match matches[MAX_MATCHES];
 };
 
 static struct bpf_align_test tests[] = {
+	/* Four tests of known constants.  These aren't staggeringly
+	 * interesting since we track exact values now.
+	 */
 	{
 		.descr = "mov",
 		.insns = {
@@ -53,11 +62,13 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
-			"2: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"3: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"4: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-			"5: R1=ctx R3=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv2"},
+			{2, "R3=inv4"},
+			{3, "R3=inv8"},
+			{4, "R3=inv16"},
+			{5, "R3=inv32"},
 		},
 	},
 	{
@@ -79,17 +90,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
-			"2: R1=ctx R3=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
-			"3: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"4: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"5: R1=ctx R3=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-			"6: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R10=fp",
-			"7: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm32,min_value=32,max_value=32,min_align=32 R10=fp",
-			"8: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm16,min_value=16,max_value=16,min_align=16 R10=fp",
-			"9: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"10: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"11: R1=ctx R3=imm1,min_value=1,max_value=1,min_align=1 R4=imm2,min_value=2,max_value=2,min_align=2 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv1"},
+			{2, "R3=inv2"},
+			{3, "R3=inv4"},
+			{4, "R3=inv8"},
+			{5, "R3=inv16"},
+			{6, "R3=inv1"},
+			{7, "R4=inv32"},
+			{8, "R4=inv16"},
+			{9, "R4=inv8"},
+			{10, "R4=inv4"},
+			{11, "R4=inv2"},
 		},
 	},
 	{
@@ -106,12 +119,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm4,min_value=4,max_value=4,min_align=4 R10=fp",
-			"2: R1=ctx R3=imm8,min_value=8,max_value=8,min_align=4 R10=fp",
-			"3: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R10=fp",
-			"4: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm8,min_value=8,max_value=8,min_align=8 R10=fp",
-			"5: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm12,min_value=12,max_value=12,min_align=4 R10=fp",
-			"6: R1=ctx R3=imm10,min_value=10,max_value=10,min_align=2 R4=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv4"},
+			{2, "R3=inv8"},
+			{3, "R3=inv10"},
+			{4, "R4=inv8"},
+			{5, "R4=inv12"},
+			{6, "R4=inv14"},
 		},
 	},
 	{
@@ -126,13 +141,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"1: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
-			"2: R1=ctx R3=imm7,min_value=7,max_value=7,min_align=1 R10=fp",
-			"3: R1=ctx R3=imm14,min_value=14,max_value=14,min_align=2 R10=fp",
-			"4: R1=ctx R3=imm56,min_value=56,max_value=56,min_align=4 R10=fp",
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3=inv7"},
+			{2, "R3=inv7"},
+			{3, "R3=inv14"},
+			{4, "R3=inv56"},
 		},
 	},
 
+	/* Tests using unknown values */
 #define PREP_PKT_POINTERS \
 	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
 		    offsetof(struct __sk_buff, data)), \
@@ -166,17 +184,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
-			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv55,min_align=2 R10=fp",
-			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv54,min_align=4 R10=fp",
-			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv53,min_align=8 R10=fp",
-			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv52,min_align=16 R10=fp",
-			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv56 R10=fp",
-			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv51,min_align=32 R10=fp",
-			"20: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv52,min_align=16 R10=fp",
-			"21: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv53,min_align=8 R10=fp",
-			"22: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv54,min_align=4 R10=fp",
-			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv55,min_align=2 R10=fp",
+			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
+			{18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+			{20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
 		},
 	},
 	{
@@ -197,16 +217,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"7: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R10=fp",
-			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"9: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv55,min_align=1 R10=fp",
-			"10: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv54,min_align=2 R10=fp",
-			"12: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"13: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv53,min_align=4 R10=fp",
-			"14: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv56 R10=fp",
-			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv52,min_align=8 R10=fp",
-			"16: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=inv56 R4=inv50,min_align=8 R10=fp"
+			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
 		},
 	},
 	{
@@ -237,12 +257,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			"4: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=0,r=0) R10=fp",
-			"5: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R5=pkt(id=0,off=14,r=0) R10=fp",
-			"6: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=0) R3=pkt_end R4=pkt(id=0,off=14,r=0) R5=pkt(id=0,off=14,r=0) R10=fp",
-			"10: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv56 R5=pkt(id=0,off=14,r=18) R10=fp",
-			"14: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
-			"15: R0=imm0,min_value=0,max_value=0,min_align=2147483648 R1=ctx R2=pkt(id=0,off=0,r=18) R3=pkt_end R4=inv48 R5=pkt(id=0,off=14,r=18) R10=fp",
+			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+			{5, "R5=pkt(id=0,off=14,r=0,imm=0)"},
+			{6, "R4=pkt(id=0,off=14,r=0,imm=0)"},
+			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
+			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
 		},
 	},
 	{
@@ -297,62 +319,286 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			"8: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R6=inv54,min_align=4 R10=fp",
-
-			/* Offset is added to packet pointer R5, resulting in known
-			 * auxiliary alignment and offset.
+			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Offset is added to packet pointer R5, resulting in
+			 * known fixed offset, and variable offset from R6.
 			 */
-			"11: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R5=pkt(id=1,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
 			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
 			 * reg->aux_off (14) which is 16.  Then the variable
 			 * offset is considered using reg->aux_off_align which
 			 * is 4 and meets the load's requirements.
 			 */
-			"15: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=1,off=4,r=4),aux_off=14,aux_off_align=4 R5=pkt(id=1,off=0,r=4),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
-
+			{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Variable offset is added to R5 packet pointer,
 			 * resulting in auxiliary alignment of 4.
 			 */
-			"18: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=0,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			"19: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off=14,aux_off_align=4 R5=pkt(id=2,off=14,r=0),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* At the time the word size load is performed from R5,
-			 * it's total offset is NET_IP_ALIGN + reg->off (14) which
-			 * is 16.  Then the variable offset is considered using
-			 * reg->aux_off_align which is 4 and meets the load's
-			 * requirements.
+			 * its total fixed offset is NET_IP_ALIGN + reg->off
+			 * (14) which is 16.  Then the variable offset is 4-byte
+			 * aligned, so the total offset is 4-byte aligned and
+			 * meets the load's requirements.
 			 */
-			"23: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=2,off=18,r=18),aux_off_align=4 R5=pkt(id=2,off=14,r=18),aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-
+			{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			"26: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=0,off=14,r=8) R6=inv54,min_align=4 R10=fp",
-			/* Variable offset is added to R5, resulting in an
-			 * auxiliary offset of 14, and an auxiliary alignment of 4.
+			{26, "R5=pkt(id=0,off=14,r=8"},
+			/* Variable offset is added to R5, resulting in a
+			 * variable offset of (4n).
 			 */
-			"27: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=0,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-			/* Constant is added to R5 again, setting reg->off to 4. */
-			"28: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=3,off=4,r=0),aux_off=14,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
-			/* And once more we add a variable, which causes an accumulation
-			 * of reg->off into reg->aux_off_align, with resulting value of
-			 * 18.  The auxiliary alignment stays at 4.
+			{27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant is added to R5 again, setting reg->off to 18. */
+			{28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* And once more we add a variable; resulting var_off
+			 * is still (4n), fixed offset is not changed.
+			 * Also, we create a new reg->id.
 			 */
-			"29: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=inv,aux_off_align=4 R5=pkt(id=4,off=0,r=0),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			{29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
-			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
-			 * reg->aux_off (18) which is 20.  Then the variable offset
-			 * is considered using reg->aux_off_align which is 4 and meets
-			 * the load's requirements.
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+			 * which is 20.  Then the variable offset is (4n), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+		},
+	},
+	{
+		.descr = "packet variable offset 2",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			/* Make a (4n) offset from the value we just read */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Packet pointer has (4n+2) offset */
+			{11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Newly read value in R6 was shifted left by 2, so has
+			 * known alignment of 4.
+			 */
+			{18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Added (4n) to packet pointer's (4n+2) var_off, giving
+			 * another (4n+2).
+			 */
+			{19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+		},
+	},
+	{
+		.descr = "dubious pointer arithmetic",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* ptr & const => unknown & const */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 0x40),
+			/* ptr << const => unknown << const */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+			/* We have a (4n) value.  Let's make a packet offset
+			 * out of it.  First add 14, to make it a (4n+2)
+			 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			/* Then make sure it's nonnegative */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to packet pointer */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.matches = {
+			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+			/* ptr & 0x40 == either 0 or 0x40 */
+			{5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
+			/* ptr << 2 == unknown, (4n) */
+			{7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			/* (4n) + 14 == (4n+2).  We blow our bounds, because
+			 * the add could overflow.
+			 */
+			{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>=0 */
+			{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* packet pointer + nonnegative (4n+2) */
+			{12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+			 * We checked the bounds, but it might have been able
+			 * to overflow if the packet pointer started in the
+			 * upper half of the address space.
+			 * So we did not get a 'range' on R6, and the access
+			 * attempt will fail.
+			 */
+			{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+		}
+	},
+	{
+		.descr = "variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Create another unknown, (4n)-aligned, and subtract
+			 * it from the first one
+			 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+			/* Bounds-check the result */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* New unknown value in R7 is (4n) */
+			{11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Subtracting it from R6 blows our unsigned bounds */
+			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>= 0 */
+			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+		},
+	},
+	{
+		.descr = "pointer variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned and bounded
+			 * to [14,74]
+			 */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Subtract it from the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+			/* Create another unknown, (4n)-aligned and >= 74.
+			 * That in fact means >= 76, since 74 % 4 == 2
 			 */
-			"33: R0=pkt(id=0,off=8,r=8) R1=ctx R2=pkt(id=0,off=0,r=8) R3=pkt_end R4=pkt(id=4,off=4,r=4),aux_off=18,aux_off_align=4 R5=pkt(id=4,off=0,r=4),aux_off=18,aux_off_align=4 R6=inv54,min_align=4 R10=fp",
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+			/* Subtracting from packet pointer overflows ubounds */
+			{13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+			/* New unknown value in R7 is (4n), >= 76 */
+			{15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+			/* Adding it to packet pointer gives nice bounds again */
+			{16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
 		},
 	},
 };
@@ -373,6 +619,9 @@ static int do_test_single(struct bpf_align_test *test)
 {
 	struct bpf_insn *prog = test->insns;
 	int prog_type = test->prog_type;
+	char bpf_vlog_copy[32768];
+	const char *line_ptr;
+	int cur_line = -1;
 	int prog_len, i;
 	int fd_prog;
 	int ret;
@@ -381,26 +630,49 @@ static int do_test_single(struct bpf_align_test *test)
 	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
 				     prog, prog_len, 1, "GPL", 0,
 				     bpf_vlog, sizeof(bpf_vlog), 2);
-	if (fd_prog < 0) {
+	if (fd_prog < 0 && test->result != REJECT) {
 		printf("Failed to load program.\n");
 		printf("%s", bpf_vlog);
 		ret = 1;
+	} else if (fd_prog >= 0 && test->result == REJECT) {
+		printf("Unexpected success to load!\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+		close(fd_prog);
 	} else {
 		ret = 0;
+		/* We make a local copy so that we can strtok() it */
+		strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
+		line_ptr = strtok(bpf_vlog_copy, "\n");
 		for (i = 0; i < MAX_MATCHES; i++) {
-			const char *t, *m = test->matches[i];
+			struct bpf_reg_match m = test->matches[i];
 
-			if (!m)
+			if (!m.match)
 				break;
-			t = strstr(bpf_vlog, m);
-			if (!t) {
-				printf("Failed to find match: %s\n", m);
+			while (line_ptr) {
+				cur_line = -1;
+				sscanf(line_ptr, "%u: ", &cur_line);
+				if (cur_line == m.line)
+					break;
+				line_ptr = strtok(NULL, "\n");
+			}
+			if (!line_ptr) {
+				printf("Failed to find line %u for match: %s\n",
+				       m.line, m.match);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+			if (!strstr(line_ptr, m.match)) {
+				printf("Failed to find match %u: %s\n",
+				       m.line, m.match);
 				ret = 1;
 				printf("%s", bpf_vlog);
 				break;
 			}
 		}
-		close(fd_prog);
+		if (fd_prog >= 0)
+			close(fd_prog);
 	}
 	return ret;
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 79601c81e169..fe3a443a1102 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -22,6 +22,7 @@
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include "bpf_util.h"
 
 static int map_flags;
@@ -438,6 +439,444 @@ static void test_arraymap_percpu_many_keys(void)
 	close(fd);
 }
 
+static void test_devmap(int task, void *data)
+{
+	int fd;
+	__u32 key, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create arraymap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	close(fd);
+}
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <sys/select.h>
+#include <linux/err.h>
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+static void test_sockmap(int tasks, void *data)
+{
+	int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
+	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
+	int ports[] = {50200, 50201, 50202, 50204};
+	int err, i, fd, sfd[6] = {0xdeadbeef};
+	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
+	int parse_prog, verdict_prog;
+	struct sockaddr_in addr;
+	struct bpf_object *obj;
+	struct timeval to;
+	__u32 key, value;
+	pid_t pid[tasks];
+	fd_set w;
+
+	/* Create some sockets to use with sockmap */
+	for (i = 0; i < 2; i++) {
+		sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (sfd[i] < 0)
+			goto out;
+		err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			printf("failed to setsockopt\n");
+			goto out;
+		}
+		err = ioctl(sfd[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			printf("failed to ioctl\n");
+			goto out;
+		}
+		memset(&addr, 0, sizeof(struct sockaddr_in));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+		addr.sin_port = htons(ports[i]);
+		err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+		if (err < 0) {
+			printf("failed to bind: err %i: %i:%i\n",
+			       err, i, sfd[i]);
+			goto out;
+		}
+		err = listen(sfd[i], 32);
+		if (err < 0) {
+			printf("failed to listen\n");
+			goto out;
+		}
+	}
+
+	for (i = 2; i < 4; i++) {
+		sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (sfd[i] < 0)
+			goto out;
+		err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			printf("set sock opt\n");
+			goto out;
+		}
+		memset(&addr, 0, sizeof(struct sockaddr_in));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+		addr.sin_port = htons(ports[i - 2]);
+		err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+		if (err) {
+			printf("failed to connect\n");
+			goto out;
+		}
+	}
+
+
+	for (i = 4; i < 6; i++) {
+		sfd[i] = accept(sfd[i - 4], NULL, NULL);
+		if (sfd[i] < 0) {
+			printf("accept failed\n");
+			goto out;
+		}
+	}
+
+	/* Test sockmap with connected sockets */
+	fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+			    sizeof(key), sizeof(value),
+			    6, 0);
+	if (fd < 0) {
+		printf("Failed to create sockmap %i\n", fd);
+		goto out_sockmap;
+	}
+
+	/* Test update without programs */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed noprog update sockmap '%i:%i'\n",
+			       i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test attaching/detaching bad fds */
+	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!err) {
+		printf("Failed invalid parser prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
+	if (!err) {
+		printf("Failed unknown prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
+	if (err) {
+		printf("Failed empty parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
+	if (err) {
+		printf("Failed empty verdict prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
+	if (!err) {
+		printf("Detach invalid prog successful\n");
+		goto out_sockmap;
+	}
+
+	/* Load SK_SKB program and Attach */
+	err = bpf_prog_load(SOCKMAP_PARSE_PROG,
+			    BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+	if (err) {
+		printf("Failed to load SK_SKB parse prog\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
+			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+	if (err) {
+		printf("Failed to load SK_SKB verdict prog\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+	if (IS_ERR(bpf_map_rx)) {
+		printf("Failed to load map rx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_rx = bpf_map__fd(bpf_map_rx);
+	if (map_fd_rx < 0) {
+		printf("Failed to get map fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+	if (IS_ERR(bpf_map_tx)) {
+		printf("Failed to load map tx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_tx = bpf_map__fd(bpf_map_tx);
+	if (map_fd_tx < 0) {
+		printf("Failed to get map tx fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+	if (IS_ERR(bpf_map_break)) {
+		printf("Failed to load map tx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_break = bpf_map__fd(bpf_map_break);
+	if (map_fd_break < 0) {
+		printf("Failed to get map tx fd\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(parse_prog, map_fd_break,
+			      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!err) {
+		printf("Allowed attaching SK_SKB program to invalid map\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(parse_prog, map_fd_rx,
+		      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		printf("Failed stream parser bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(verdict_prog, map_fd_rx,
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		printf("Failed stream verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(verdict_prog, map_fd_rx,
+			      __MAX_BPF_ATTACH_TYPE, 0);
+	if (!err) {
+		printf("Attached unknown bpf prog\n");
+		goto out_sockmap;
+	}
+
+	/* Test map update elem afterwards fd lives in fd and map_fd */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test map delete elem and remove send/recv sockets */
+	for (i = 2; i < 4; i++) {
+		err = bpf_map_delete_elem(map_fd_rx, &i);
+		if (err) {
+			printf("Failed delete sockmap rx %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_delete_elem(map_fd_tx, &i);
+		if (err) {
+			printf("Failed delete sockmap tx %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test map send/recv */
+	for (i = 0; i < 2; i++) {
+		buf[0] = i;
+		buf[1] = 0x5;
+		sc = send(sfd[2], buf, 20, 0);
+		if (sc < 0) {
+			printf("Failed sockmap send\n");
+			goto out_sockmap;
+		}
+
+		FD_ZERO(&w);
+		FD_SET(sfd[3], &w);
+		to.tv_sec = 1;
+		to.tv_usec = 0;
+		s = select(sfd[3] + 1, &w, NULL, NULL, &to);
+		if (s == -1) {
+			perror("Failed sockmap select()");
+			goto out_sockmap;
+		} else if (!s) {
+			printf("Failed sockmap unexpected timeout\n");
+			goto out_sockmap;
+		}
+
+		if (!FD_ISSET(sfd[3], &w)) {
+			printf("Failed sockmap select/recv\n");
+			goto out_sockmap;
+		}
+
+		rc = recv(sfd[3], buf, sizeof(buf), 0);
+		if (rc < 0) {
+			printf("Failed sockmap recv\n");
+			goto out_sockmap;
+		}
+	}
+
+	/* Negative null entry lookup from datapath should be dropped */
+	buf[0] = 1;
+	buf[1] = 12;
+	sc = send(sfd[2], buf, 20, 0);
+	if (sc < 0) {
+		printf("Failed sockmap send\n");
+		goto out_sockmap;
+	}
+
+	/* Push fd into same slot */
+	i = 2;
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+	if (!err) {
+		printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+	if (err) {
+		printf("Failed sockmap update new slot BPF_ANY\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+	if (err) {
+		printf("Failed sockmap update new slot BPF_EXIST\n");
+		goto out_sockmap;
+	}
+
+	/* Delete the elems without programs */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_delete_elem(fd, &i);
+		if (err) {
+			printf("Failed delete sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+		}
+	}
+
+	/* Test having multiple maps open and set with programs on same fds */
+	err = bpf_prog_attach(parse_prog, fd,
+			      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		printf("Failed fd bpf parse prog attach\n");
+		goto out_sockmap;
+	}
+	err = bpf_prog_attach(verdict_prog, fd,
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		printf("Failed fd bpf verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	for (i = 4; i < 6; i++) {
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+		if (!err) {
+			printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+		if (!err) {
+			printf("Failed allowed duplicate program in update NOEXIST sockmap  %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+		if (!err) {
+			printf("Failed allowed duplicate program in update EXIST sockmap  %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test tasks number of forked operations */
+	for (i = 0; i < tasks; i++) {
+		pid[i] = fork();
+		if (pid[i] == 0) {
+			for (i = 0; i < 6; i++) {
+				bpf_map_delete_elem(map_fd_tx, &i);
+				bpf_map_delete_elem(map_fd_rx, &i);
+				bpf_map_update_elem(map_fd_tx, &i,
+						    &sfd[i], BPF_ANY);
+				bpf_map_update_elem(map_fd_rx, &i,
+						    &sfd[i], BPF_ANY);
+			}
+			exit(0);
+		} else if (pid[i] == -1) {
+			printf("Couldn't spawn #%d process!\n", i);
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < tasks; i++) {
+		int status;
+
+		assert(waitpid(pid[i], &status, 0) == pid[i]);
+		assert(status == 0);
+	}
+
+	err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+	if (!err) {
+		printf("Detached an invalid prog type.\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+	if (err) {
+		printf("Failed parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+	if (err) {
+		printf("Failed parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	/* Test map close sockets */
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	close(fd);
+	close(map_fd_rx);
+	bpf_object__close(obj);
+	return;
+out:
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
+	exit(1);
+out_sockmap:
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	close(fd);
+	exit(1);
+}
+
 #define MAP_SIZE (32 * 1024)
 
 static void test_map_large(void)
@@ -605,6 +1044,9 @@ static void run_all_tests(void)
 
 	test_arraymap_percpu_many_keys();
 
+	test_devmap(0, NULL);
+	test_sockmap(0, NULL);
+
 	test_map_large();
 	test_map_parallel();
 	test_map_stress();
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 1f7dd35551b9..11ee25cea227 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -75,39 +75,6 @@ static struct {
 	__ret;								\
 })
 
-static int bpf_prog_load(const char *file, enum bpf_prog_type type,
-			 struct bpf_object **pobj, int *prog_fd)
-{
-	struct bpf_program *prog;
-	struct bpf_object *obj;
-	int err;
-
-	obj = bpf_object__open(file);
-	if (IS_ERR(obj)) {
-		error_cnt++;
-		return -ENOENT;
-	}
-
-	prog = bpf_program__next(NULL, obj);
-	if (!prog) {
-		bpf_object__close(obj);
-		error_cnt++;
-		return -ENOENT;
-	}
-
-	bpf_program__set_type(prog, type);
-	err = bpf_object__load(obj);
-	if (err) {
-		bpf_object__close(obj);
-		error_cnt++;
-		return -EINVAL;
-	}
-
-	*pobj = obj;
-	*prog_fd = bpf_program__fd(prog);
-	return 0;
-}
-
 static int bpf_find_map(const char *test, struct bpf_object *obj,
 			const char *name)
 {
@@ -130,8 +97,10 @@ static void test_pkt_access(void)
 	int err, prog_fd;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, &retval, &duration);
@@ -162,8 +131,10 @@ static void test_xdp(void)
 	int err, prog_fd, map_fd;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	map_fd = bpf_find_map(__func__, obj, "vip2tnl");
 	if (map_fd < 0)
@@ -223,8 +194,10 @@ static void test_l4lb(void)
 	u32 *magic = (u32 *)buf;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	map_fd = bpf_find_map(__func__, obj, "vip_map");
 	if (map_fd < 0)
@@ -280,8 +253,10 @@ static void test_tcp_estats(void)
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	CHECK(err, "", "err %d errno %d\n", err, errno);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	bpf_object__close(obj);
 }
@@ -304,7 +279,7 @@ static void test_bpf_obj_id(void)
 	/* +1 to test for the info_len returned by kernel */
 	struct bpf_prog_info prog_infos[nr_iters + 1];
 	struct bpf_map_info map_infos[nr_iters + 1];
-	char jited_insns[128], xlated_insns[128];
+	char jited_insns[128], xlated_insns[128], zeros[128];
 	__u32 i, next_id, info_len, nr_id_found, duration = 0;
 	int sysctl_fd, jit_enabled = 0, err = 0;
 	__u64 array_value;
@@ -330,17 +305,22 @@ static void test_bpf_obj_id(void)
 		objs[i] = NULL;
 
 	/* Check bpf_obj_get_info_by_fd() */
+	bzero(zeros, sizeof(zeros));
 	for (i = 0; i < nr_iters; i++) {
 		err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
 				    &objs[i], &prog_fds[i]);
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
 		 */
+		if (err)
+			error_cnt++;
 		assert(!err);
 
 		/* Check getting prog info */
 		info_len = sizeof(struct bpf_prog_info) * 2;
 		bzero(&prog_infos[i], info_len);
+		bzero(jited_insns, sizeof(jited_insns));
+		bzero(xlated_insns, sizeof(xlated_insns));
 		prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
 		prog_infos[i].jited_prog_len = sizeof(jited_insns);
 		prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
@@ -351,15 +331,20 @@ static void test_bpf_obj_id(void)
 			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
 			  info_len != sizeof(struct bpf_prog_info) ||
 			  (jit_enabled && !prog_infos[i].jited_prog_len) ||
-			  !prog_infos[i].xlated_prog_len,
+			  (jit_enabled &&
+			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
+			  !prog_infos[i].xlated_prog_len ||
+			  !memcmp(xlated_insns, zeros, sizeof(zeros)),
 			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u\n",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n",
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
 			  jit_enabled,
 			  prog_infos[i].jited_prog_len,
-			  prog_infos[i].xlated_prog_len))
+			  prog_infos[i].xlated_prog_len,
+			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
+			  !!memcmp(xlated_insns, zeros, sizeof(zeros))))
 			goto done;
 
 		map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
@@ -496,8 +481,10 @@ static void test_pkt_md_access(void)
 	int err, prog_fd;
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
-	if (err)
+	if (err) {
+		error_cnt++;
 		return;
+	}
 
 	err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
 				NULL, NULL, &retval, &duration);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d3ed7324105e..26f3250bdcd2 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -422,7 +422,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "R1 pointer arithmetic",
+		.errstr_unpriv = "R1 subtraction from stack pointer",
 		.result_unpriv = REJECT,
 		.errstr = "R1 invalid mem access",
 		.result = REJECT,
@@ -604,8 +604,9 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned stack access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"invalid map_fd for function call",
@@ -651,8 +652,9 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr = "misaligned access",
+		.errstr = "misaligned value access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"sometimes access memory with incorrect alignment",
@@ -673,6 +675,7 @@ static struct bpf_test tests[] = {
 		.errstr = "R0 invalid mem access",
 		.errstr_unpriv = "R0 leaks addr",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"jump test 1",
@@ -964,6 +967,256 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 	},
 	{
+		"invalid access __sk_buff family",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff local_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff local_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"valid access __sk_buff family",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff local_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff local_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"invalid access of tc_classid for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"check skb->mark is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"check skb->tc_index is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"check skb->priority is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"direct packet read for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"direct packet write for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"overlapping checks for direct packet access SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
 		"check skb->mark is not writeable by sockets",
 		.insns = {
 			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
@@ -1216,8 +1469,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[0]) + 1),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check __sk_buff->hash, offset 0, half store not permitted",
@@ -1320,8 +1574,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[0]) + 2),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: word, unaligned 2",
@@ -1331,8 +1586,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[4]) + 1),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: word, unaligned 3",
@@ -1342,8 +1598,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[4]) + 2),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: word, unaligned 4",
@@ -1353,8 +1610,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[4]) + 3),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: double",
@@ -1380,8 +1638,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[1])),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: double, unaligned 2",
@@ -1391,8 +1650,9 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, cb[3])),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "misaligned access",
+		.errstr = "misaligned context access",
 		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"check cb access: double, oob 1",
@@ -1524,7 +1784,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "misaligned access off -6 size 8",
+		.errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"PTR_TO_STACK store/load - bad alignment on reg",
@@ -1536,7 +1797,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "misaligned access off -2 size 8",
+		.errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
 	},
 	{
 		"PTR_TO_STACK store/load - out of bounds low",
@@ -1580,8 +1842,6 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-		.result_unpriv = REJECT,
-		.errstr_unpriv = "R1 pointer arithmetic",
 	},
 	{
 		"unpriv: add pointer to pointer",
@@ -1592,7 +1852,7 @@ static struct bpf_test tests[] = {
 		},
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
-		.errstr_unpriv = "R1 pointer arithmetic",
+		.errstr_unpriv = "R1 pointer += pointer",
 	},
 	{
 		"unpriv: neg pointer",
@@ -1933,10 +2193,7 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "pointer arithmetic prohibited",
-		.result_unpriv = REJECT,
-		.errstr = "R1 invalid mem access",
-		.result = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"unpriv: cmp of stack pointer",
@@ -2000,7 +2257,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3",
+		.errstr = "R4 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2017,7 +2274,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3",
+		.errstr = "R4 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2219,7 +2476,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3 off=-1 access_size=-1",
+		.errstr = "R4 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2236,7 +2493,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3 off=-1 access_size=2147483647",
+		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2253,7 +2510,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid stack type R3 off=-512 access_size=2147483647",
+		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
@@ -2324,8 +2581,8 @@ static struct bpf_test tests[] = {
 				    offsetof(struct __sk_buff, data)),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 48),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 48),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
@@ -2653,7 +2910,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
 			BPF_JMP_A(-6),
 		},
-		.errstr = "misaligned packet access off 2+15+-4 size 4",
+		.errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@ -2704,11 +2961,11 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
 			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -2730,10 +2987,10 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
 			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -2759,7 +3016,7 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
 			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
@@ -2796,7 +3053,7 @@ static struct bpf_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
-		.errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet",
+		.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
 	},
 	{
 		"direct packet access: test24 (x += pkt_ptr, 5)",
@@ -2814,7 +3071,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
 			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
 			BPF_MOV64_IMM(BPF_REG_0, 0),
@@ -2824,6 +3081,79 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 	},
 	{
+		"direct packet access: test25 (marking on <, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test26 (marking on <, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test27 (marking on <=, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test28 (marking on <=, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -3113,7 +3443,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test14, cls helper fail sub",
+		"helper access to packet: test14, cls helper ok sub",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3133,12 +3463,36 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test15, cls helper fail sub",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
 		.result = REJECT,
-		.errstr = "type=inv expected=fp",
+		.errstr = "invalid access to packet",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test15, cls helper fail range 1",
+		"helper access to packet: test16, cls helper fail range 1",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3163,7 +3517,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test16, cls helper fail range 2",
+		"helper access to packet: test17, cls helper fail range 2",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3184,11 +3538,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid access to packet",
+		.errstr = "R2 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test17, cls helper fail range 3",
+		"helper access to packet: test18, cls helper fail range 3",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3209,11 +3563,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.result = REJECT,
-		.errstr = "invalid access to packet",
+		.errstr = "R2 min value is negative",
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test18, cls helper fail range zero",
+		"helper access to packet: test19, cls helper fail range zero",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3238,7 +3592,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test19, pkt end as input",
+		"helper access to packet: test20, pkt end as input",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3263,7 +3617,7 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
 	{
-		"helper access to packet: test20, wrong reg",
+		"helper access to packet: test21, wrong reg",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 				    offsetof(struct __sk_buff, data)),
@@ -3323,7 +3677,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3347,7 +3701,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3375,7 +3729,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.result = ACCEPT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3416,9 +3770,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is outside of the array range",
-		.result_unpriv = REJECT,
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -3440,9 +3792,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
-		.result_unpriv = REJECT,
+		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -3456,7 +3806,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
 			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
 			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
 			BPF_MOV32_IMM(BPF_REG_1, 0),
@@ -3467,8 +3817,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3494,7 +3844,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 		.result_unpriv = REJECT,
 		.result = REJECT,
@@ -3524,8 +3874,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3, 11 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr_unpriv = "R0 pointer += pointer",
+		.errstr = "R0 invalid mem access 'inv'",
 		.result_unpriv = REJECT,
 		.result = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -3667,34 +4017,6 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 	},
 	{
-		"multiple registers share map_lookup_elem bad reg type",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_1, 10),
-			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-			BPF_LD_MAP_FD(BPF_REG_1, 0),
-			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-				     BPF_FUNC_map_lookup_elem),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
-			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
-			BPF_MOV64_IMM(BPF_REG_1, 1),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
-			BPF_MOV64_IMM(BPF_REG_1, 2),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1),
-			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 0),
-			BPF_MOV64_IMM(BPF_REG_1, 3),
-			BPF_EXIT_INSN(),
-		},
-		.fixup_map1 = { 4 },
-		.result = REJECT,
-		.errstr = "R3 invalid mem access 'inv'",
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS
-	},
-	{
 		"invalid map access from else condition",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@ -3712,9 +4034,9 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+		.errstr = "R0 unbounded memory access",
 		.result = REJECT,
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
@@ -4092,7 +4414,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "invalid access to map value, value_size=48 off=0 size=-8",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4158,7 +4480,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is outside of the array range",
+		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4204,7 +4526,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4226,7 +4548,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is outside of the array range",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4342,7 +4664,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4365,7 +4687,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is outside of the array range",
+		.errstr = "R2 min value is negative",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4453,13 +4775,13 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
-			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_EMIT_CALL(BPF_FUNC_probe_read),
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr = "R1 min value is negative, either use unsigned index or do a if (index >=0) check",
+		.errstr = "R1 unbounded memory access",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -4490,6 +4812,246 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"helper access to map: bounds check using <, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 unbounded memory access",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <=, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <=, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 unbounded memory access",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, good access 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 min value is negative",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, good access 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 min value is negative",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
 		"map element value is preserved across register spilling",
 		.insns = {
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -4579,7 +5141,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4607,7 +5169,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4626,7 +5188,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 bitwise operator &= on pointer",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -4645,7 +5207,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -4664,7 +5226,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 pointer arithmetic with /= operator",
 		.errstr = "invalid mem access 'inv'",
 		.result = REJECT,
 		.result_unpriv = REJECT,
@@ -4707,10 +5269,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 invalid mem access 'inv'",
 		.errstr = "R0 invalid mem access 'inv'",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"map element value is preserved across register spilling",
@@ -4732,7 +5292,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 leaks addr",
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -4914,7 +5474,8 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R2 unbounded memory access",
+		/* because max wasn't checked, signed min is negative */
+		.errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
@@ -5063,6 +5624,20 @@ static struct bpf_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
 	{
+		"helper access to variable memory: size = 0 allowed on NULL",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
 		"helper access to variable memory: size > 0 not allowed on NULL",
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 0),
@@ -5076,7 +5651,7 @@ static struct bpf_test tests[] = {
 			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 			BPF_EXIT_INSN(),
 		},
-		.errstr = "R1 type=imm expected=fp",
+		.errstr = "R1 type=inv expected=fp",
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
@@ -5161,7 +5736,7 @@ static struct bpf_test tests[] = {
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
-			BPF_MOV64_IMM(BPF_REG_1, 6),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
 			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
@@ -5170,10 +5745,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
@@ -5202,10 +5775,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map2 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr = "R0 max value is outside of the array range",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 	},
 	{
@@ -5252,7 +5823,7 @@ static struct bpf_test tests[] = {
 		},
 		.fixup_map_in_map = { 3 },
 		.errstr = "R1 type=inv expected=map_ptr",
-		.errstr_unpriv = "R1 pointer arithmetic prohibited",
+		.errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
 		.result = REJECT,
 	},
 	{
@@ -5532,10 +6103,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned",
@@ -5558,10 +6127,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 2",
@@ -5586,10 +6153,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R8 invalid mem access 'inv'",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 3",
@@ -5613,10 +6178,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R8 invalid mem access 'inv'",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 4",
@@ -5639,10 +6202,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 5",
@@ -5666,10 +6226,8 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 invalid mem access",
+		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 6",
@@ -5690,10 +6248,8 @@ static struct bpf_test tests[] = {
 			BPF_MOV64_IMM(BPF_REG_0, 0),
 			BPF_EXIT_INSN(),
 		},
-		.errstr_unpriv = "R4 min value is negative, either use unsigned",
 		.errstr = "R4 min value is negative, either use unsigned",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 7",
@@ -5716,10 +6272,7 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
+		.result = ACCEPT,
 	},
 	{
 		"bounds checks mixing signed and unsigned, variant 8",
@@ -5730,32 +6283,6 @@ static struct bpf_test tests[] = {
 			BPF_LD_MAP_FD(BPF_REG_1, 0),
 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 				     BPF_FUNC_map_lookup_elem),
-			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
-			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
-			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
-			BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024 + 1),
-			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
-			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
-			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
-	},
-	{
-		"bounds checks mixing signed and unsigned, variant 9",
-		.insns = {
-			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-			BPF_LD_MAP_FD(BPF_REG_1, 0),
-			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
-				     BPF_FUNC_map_lookup_elem),
 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
 			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
@@ -5770,13 +6297,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 10",
+		"bounds checks mixing signed and unsigned, variant 9",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5798,13 +6323,10 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
-		.errstr = "R0 min value is negative",
-		.result = REJECT,
-		.result_unpriv = REJECT,
+		.result = ACCEPT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 11",
+		"bounds checks mixing signed and unsigned, variant 10",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5826,13 +6348,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 12",
+		"bounds checks mixing signed and unsigned, variant 11",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5855,13 +6375,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 13",
+		"bounds checks mixing signed and unsigned, variant 12",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5883,13 +6401,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 14",
+		"bounds checks mixing signed and unsigned, variant 13",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5914,13 +6430,11 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 15",
+		"bounds checks mixing signed and unsigned, variant 14",
 		.insns = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
 				    offsetof(struct __sk_buff, mark)),
@@ -5946,13 +6460,11 @@ static struct bpf_test tests[] = {
 			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
 		},
 		.fixup_map1 = { 4 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
-		.result_unpriv = REJECT,
 	},
 	{
-		"bounds checks mixing signed and unsigned, variant 16",
+		"bounds checks mixing signed and unsigned, variant 15",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -5976,13 +6488,13 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr_unpriv = "R0 pointer comparison prohibited",
 		.errstr = "R0 min value is negative",
 		.result = REJECT,
 		.result_unpriv = REJECT,
 	},
 	{
-		"subtraction bounds (map value)",
+		"subtraction bounds (map value) variant 1",
 		.insns = {
 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -6004,10 +6516,134 @@ static struct bpf_test tests[] = {
 			BPF_EXIT_INSN(),
 		},
 		.fixup_map1 = { 3 },
-		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT,
+	},
+	{
+		"subtraction bounds (map value) variant 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
+			BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
 		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 		.result = REJECT,
+	},
+	{
+		"variable-offset ctx access",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			/* add it to skb.  We now have either &skb->len or
+			 * &skb->pkt_type, but we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			/* dereference it */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "variable ctx access var_off=(0x0; 0x4)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"variable-offset stack access",
+		.insns = {
+			/* Fill the top 8 bytes of the stack */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+			/* add it to fp.  We now have either fp-4 or fp-8, but
+			 * we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+			/* dereference it */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"liveness pruning and write screening",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* branch conditions teach us nothing about R2 */
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"varlen_map_value_access pruning",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
 		.result_unpriv = REJECT,
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid 64-bit BPF_END",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			{
+				.code  = BPF_ALU64 | BPF_END | BPF_TO_LE,
+				.dst_reg = BPF_REG_0,
+				.src_reg = 0,
+				.off   = 0,
+				.imm   = 32,
+			},
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_END uses reserved fields",
+		.result = REJECT,
 	},
 };
 
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c
new file mode 100644
index 000000000000..ef9e704be140
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.c
@@ -0,0 +1,28 @@
+/* Copyright (c) 2017 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("redirect_to_111")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+	return bpf_redirect(111, 0);
+}
+SEC("redirect_to_222")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+	return bpf_redirect(222, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
new file mode 100755
index 000000000000..344a3656dea6
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Create 2 namespaces with two veth peers, and
+# forward packets in-between using generic XDP
+#
+# NS1(veth11)     NS2(veth22)
+#     |               |
+#     |               |
+#   (veth1, ------ (veth2,
+#   id:111)         id:222)
+#     | xdp forwarding |
+#     ------------------
+
+cleanup()
+{
+	if [ "$?" = "0" ]; then
+		echo "selftests: test_xdp_redirect [PASS]";
+	else
+		echo "selftests: test_xdp_redirect [FAILED]";
+	fi
+
+	set +e
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+	exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 index 111 type veth peer name veth11
+ip link add veth2 index 222 type veth peer name veth22
+
+ip link set veth11 netns ns1
+ip link set veth22 netns ns2
+
+ip link set veth1 up
+ip link set veth2 up
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+
+ip netns exec ns1 ip link set dev veth11 up
+ip netns exec ns2 ip link set dev veth22 up
+
+ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
+ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 6b214b7b10fb..247b0a1899d7 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -2,14 +2,14 @@
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
+TEST_GEN_PROGS := step_after_suspend_test
+
 ifeq ($(ARCH),x86)
-TEST_GEN_PROGS := breakpoint_test
+TEST_GEN_PROGS += breakpoint_test
 endif
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-TEST_GEN_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS += breakpoint_test_arm64
 endif
 
-TEST_GEN_PROGS += step_after_suspend_test
-
 include ../lib.mk
 
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
index f63356151ad4..901b85ea6a59 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -367,11 +367,11 @@ static void launch_tests(void)
 
 	/* Icebp traps */
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	check_success("Test icebp");
+	check_success("Test icebp\n");
 
 	/* Int 3 traps */
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
-	check_success("Test int 3 trap");
+	check_success("Test int 3 trap\n");
 
 	ptrace(PTRACE_CONT, child_pid, NULL, 0);
 }
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index 763f37fecfb8..cf6778441381 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -1,7 +1,6 @@
 #define _GNU_SOURCE
 
 #include <cap-ng.h>
-#include <err.h>
 #include <linux/capability.h>
 #include <stdbool.h>
 #include <string.h>
@@ -18,6 +17,8 @@
 #include <sys/prctl.h>
 #include <sys/stat.h>
 
+#include "../kselftest.h"
+
 #ifndef PR_CAP_AMBIENT
 #define PR_CAP_AMBIENT			47
 # define PR_CAP_AMBIENT_IS_SET		1
@@ -27,6 +28,7 @@
 #endif
 
 static int nerrs;
+static pid_t mpid;	/*  main() pid is used to avoid duplicate test counts */
 
 static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
 {
@@ -36,29 +38,32 @@ static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list
 	int buf_len;
 
 	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
-	if (buf_len < 0) {
-		err(1, "vsnprintf failed");
-	}
-	if (buf_len >= sizeof(buf)) {
-		errx(1, "vsnprintf output truncated");
-	}
+	if (buf_len < 0)
+		ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno));
+
+	if (buf_len >= sizeof(buf))
+		ksft_exit_fail_msg("vsnprintf output truncated\n");
+
 
 	fd = open(filename, O_WRONLY);
 	if (fd < 0) {
 		if ((errno == ENOENT) && enoent_ok)
 			return;
-		err(1, "open of %s failed", filename);
+		ksft_exit_fail_msg("open of %s failed - %s\n",
+					filename, strerror(errno));
 	}
 	written = write(fd, buf, buf_len);
 	if (written != buf_len) {
 		if (written >= 0) {
-			errx(1, "short write to %s", filename);
+			ksft_exit_fail_msg("short write to %s\n", filename);
 		} else {
-			err(1, "write to %s failed", filename);
+			ksft_exit_fail_msg("write to %s failed - %s\n",
+						filename, strerror(errno));
 		}
 	}
 	if (close(fd) != 0) {
-		err(1, "close of %s failed", filename);
+		ksft_exit_fail_msg("close of %s failed - %s\n",
+					filename, strerror(errno));
 	}
 }
 
@@ -95,11 +100,12 @@ static bool create_and_enter_ns(uid_t inner_uid)
 	 */
 
 	if (unshare(CLONE_NEWNS) == 0) {
-		printf("[NOTE]\tUsing global UIDs for tests\n");
+		ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
 		if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
-			err(1, "PR_SET_KEEPCAPS");
+			ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
+						strerror(errno));
 		if (setresuid(inner_uid, inner_uid, -1) != 0)
-			err(1, "setresuid");
+			ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
 
 		// Re-enable effective caps
 		capng_get_caps_process();
@@ -107,22 +113,24 @@ static bool create_and_enter_ns(uid_t inner_uid)
 			if (capng_have_capability(CAPNG_PERMITTED, i))
 				capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
 		if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-			err(1, "capng_apply");
+			ksft_exit_fail_msg(
+					"capng_apply - %s\n", strerror(errno));
 
 		have_outer_privilege = true;
 	} else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
-		printf("[NOTE]\tUsing a user namespace for tests\n");
+		ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n");
 		maybe_write_file("/proc/self/setgroups", "deny");
 		write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
 		write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
 
 		have_outer_privilege = false;
 	} else {
-		errx(1, "must be root or be able to create a userns");
+		ksft_exit_skip("must be root or be able to create a userns\n");
 	}
 
 	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
-		err(1, "remount everything private");
+		ksft_exit_fail_msg("remount everything private - %s\n",
+					strerror(errno));
 
 	return have_outer_privilege;
 }
@@ -131,20 +139,22 @@ static void chdir_to_tmpfs(void)
 {
 	char cwd[PATH_MAX];
 	if (getcwd(cwd, sizeof(cwd)) != cwd)
-		err(1, "getcwd");
+		ksft_exit_fail_msg("getcwd - %s\n", strerror(errno));
 
 	if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
-		err(1, "mount private tmpfs");
+		ksft_exit_fail_msg("mount private tmpfs - %s\n",
+					strerror(errno));
 
 	if (chdir(cwd) != 0)
-		err(1, "chdir to private tmpfs");
+		ksft_exit_fail_msg("chdir to private tmpfs - %s\n",
+					strerror(errno));
 }
 
 static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
 {
 	int from = openat(fromfd, fromname, O_RDONLY);
 	if (from == -1)
-		err(1, "open copy source");
+		ksft_exit_fail_msg("open copy source - %s\n", strerror(errno));
 
 	int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
 
@@ -154,10 +164,11 @@ static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
 		if (sz == 0)
 			break;
 		if (sz < 0)
-			err(1, "read");
+			ksft_exit_fail_msg("read - %s\n", strerror(errno));
 
 		if (write(to, buf, sz) != sz)
-			err(1, "write");	/* no short writes on tmpfs */
+			/* no short writes on tmpfs */
+			ksft_exit_fail_msg("write - %s\n", strerror(errno));
 	}
 
 	close(from);
@@ -174,18 +185,20 @@ static bool fork_wait(void)
 		int status;
 		if (waitpid(child, &status, 0) != child ||
 		    !WIFEXITED(status)) {
-			printf("[FAIL]\tChild died\n");
+			ksft_print_msg("Child died\n");
 			nerrs++;
 		} else if (WEXITSTATUS(status) != 0) {
-			printf("[FAIL]\tChild failed\n");
+			ksft_print_msg("Child failed\n");
 			nerrs++;
 		} else {
-			printf("[OK]\tChild succeeded\n");
+			/* don't print this message for mpid */
+			if (getpid() != mpid)
+				ksft_test_result_pass("Passed\n");
 		}
-
 		return false;
 	} else {
-		err(1, "fork");
+		ksft_exit_fail_msg("fork - %s\n", strerror(errno));
+		return false;
 	}
 }
 
@@ -195,7 +208,7 @@ static void exec_other_validate_cap(const char *name,
 	execl(name, name, (eff ? "1" : "0"),
 	      (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
 	      NULL);
-	err(1, "execl");
+	ksft_exit_fail_msg("execl - %s\n", strerror(errno));
 }
 
 static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
@@ -209,7 +222,8 @@ static int do_tests(int uid, const char *our_path)
 
 	int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
 	if (ourpath_fd == -1)
-		err(1, "open '%s'", our_path);
+		ksft_exit_fail_msg("open '%s' - %s\n",
+					our_path, strerror(errno));
 
 	chdir_to_tmpfs();
 
@@ -221,30 +235,30 @@ static int do_tests(int uid, const char *our_path)
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_suidroot");
 		if (chown("validate_cap_suidroot", 0, -1) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_suidnonroot");
 		if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_sgidroot");
 		if (chown("validate_cap_sgidroot", -1, 0) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 
 		copy_fromat_to(ourpath_fd, "validate_cap",
 			       "validate_cap_sgidnonroot");
 		if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
-			err(1, "chown");
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
 		if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
-			err(1, "chmod");
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
 	}
 
 	capng_get_caps_process();
@@ -252,147 +266,162 @@ static int do_tests(int uid, const char *our_path)
 	/* Make sure that i starts out clear */
 	capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 
 	if (uid == 0) {
-		printf("[RUN]\tRoot => ep\n");
+		ksft_print_msg("[RUN]\tRoot => ep\n");
 		if (fork_wait())
 			exec_validate_cap(true, true, false, false);
 	} else {
-		printf("[RUN]\tNon-root => no caps\n");
+		ksft_print_msg("[RUN]\tNon-root => no caps\n");
 		if (fork_wait())
 			exec_validate_cap(false, false, false, false);
 	}
 
-	printf("[OK]\tCheck cap_ambient manipulation rules\n");
+	ksft_print_msg("Check cap_ambient manipulation rules\n");
 
 	/* We should not be able to add ambient caps yet. */
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
 		if (errno == EINVAL)
-			printf("[FAIL]\tPR_CAP_AMBIENT_RAISE isn't supported\n");
+			ksft_test_result_fail(
+				"PR_CAP_AMBIENT_RAISE isn't supported\n");
 		else
-			printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+			ksft_test_result_fail(
+				"PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
 		return 1;
 	}
-	printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+	ksft_test_result_pass(
+		"PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
 
 	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
 	capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
 	capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
 		return 1;
 	}
-	printf("[OK]\tPR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+	ksft_test_result_pass(
+		"PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
 
 	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_RAISE should have succeeded\n");
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_RAISE should have succeeded\n");
 		return 1;
 	}
-	printf("[OK]\tPR_CAP_AMBIENT_RAISE worked\n");
+	ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n");
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_IS_SET is broken\n");
+		ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n");
 		return 1;
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
-		err(1, "PR_CAP_AMBIENT_CLEAR_ALL");
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n",
+					strerror(errno));
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
-		printf("[FAIL]\tPR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
 		return 1;
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
-		err(1, "PR_CAP_AMBIENT_RAISE");
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+					strerror(errno));
 
 	capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
-		printf("[FAIL]\tDropping I should have dropped A\n");
+		ksft_test_result_fail("Dropping I should have dropped A\n");
 		return 1;
 	}
 
-	printf("[OK]\tBasic manipulation appears to work\n");
+	ksft_test_result_pass("Basic manipulation appears to work\n");
 
 	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
 	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
-		err(1, "capng_apply");
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
 	if (uid == 0) {
-		printf("[RUN]\tRoot +i => eip\n");
+		ksft_print_msg("[RUN]\tRoot +i => eip\n");
 		if (fork_wait())
 			exec_validate_cap(true, true, true, false);
 	} else {
-		printf("[RUN]\tNon-root +i => i\n");
+		ksft_print_msg("[RUN]\tNon-root +i => i\n");
 		if (fork_wait())
 			exec_validate_cap(false, false, true, false);
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
-		err(1, "PR_CAP_AMBIENT_RAISE");
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+					strerror(errno));
 
-	printf("[RUN]\tUID %d +ia => eipa\n", uid);
+	ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid);
 	if (fork_wait())
 		exec_validate_cap(true, true, true, true);
 
 	/* The remaining tests need real privilege */
 
 	if (!have_outer_privilege) {
-		printf("[SKIP]\tSUID/SGID tests (needs privilege)\n");
+		ksft_test_result_skip("SUID/SGID tests (needs privilege)\n");
 		goto done;
 	}
 
 	if (uid == 0) {
-		printf("[RUN]\tRoot +ia, suidroot => eipa\n");
+		ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_suidroot",
 						true, true, true, true);
 
-		printf("[RUN]\tRoot +ia, suidnonroot => ip\n");
+		ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_suidnonroot",
 						false, true, true, false);
 
-		printf("[RUN]\tRoot +ia, sgidroot => eipa\n");
+		ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_sgidroot",
 						true, true, true, true);
 
 		if (fork_wait()) {
-			printf("[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+			ksft_print_msg(
+				"[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
 			if (setresgid(1, 1, 1) != 0)
-				err(1, "setresgid");
+				ksft_exit_fail_msg("setresgid - %s\n",
+							strerror(errno));
 			exec_other_validate_cap("./validate_cap_sgidroot",
 						true, true, true, false);
 		}
 
-		printf("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+		ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n");
 		if (fork_wait())
 			exec_other_validate_cap("./validate_cap_sgidnonroot",
 						true, true, true, false);
 	} else {
-		printf("[RUN]\tNon-root +ia, sgidnonroot => i\n");
-		exec_other_validate_cap("./validate_cap_sgidnonroot",
+		ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_sgidnonroot",
 					false, false, true, false);
 
 		if (fork_wait()) {
-			printf("[RUN]\tNon-root +ia, sgidroot => i\n");
+			ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n");
 			if (setresgid(1, 1, 1) != 0)
-				err(1, "setresgid");
+				ksft_exit_fail_msg("setresgid - %s\n",
+							strerror(errno));
 			exec_other_validate_cap("./validate_cap_sgidroot",
 						false, false, true, false);
 		}
 	}
 
 done:
+	ksft_print_cnts();
 	return nerrs ? 1 : 0;
 }
 
@@ -400,23 +429,29 @@ int main(int argc, char **argv)
 {
 	char *tmp1, *tmp2, *our_path;
 
+	ksft_print_header();
+
 	/* Find our path */
 	tmp1 = strdup(argv[0]);
 	if (!tmp1)
-		err(1, "strdup");
+		ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
 	tmp2 = dirname(tmp1);
 	our_path = strdup(tmp2);
 	if (!our_path)
-		err(1, "strdup");
+		ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
 	free(tmp1);
 
+	mpid = getpid();
+
 	if (fork_wait()) {
-		printf("[RUN]\t+++ Tests with uid == 0 +++\n");
+		ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n");
 		return do_tests(0, our_path);
 	}
 
+	ksft_print_msg("==================================================\n");
+
 	if (fork_wait()) {
-		printf("[RUN]\t+++ Tests with uid != 0 +++\n");
+		ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n");
 		return do_tests(1, our_path);
 	}
 
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index dd3c45f7b23c..694cd73d4493 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -1,5 +1,4 @@
 #include <cap-ng.h>
-#include <err.h>
 #include <linux/capability.h>
 #include <stdbool.h>
 #include <string.h>
@@ -7,6 +6,8 @@
 #include <sys/prctl.h>
 #include <sys/auxv.h>
 
+#include "../kselftest.h"
+
 #ifndef PR_CAP_AMBIENT
 #define PR_CAP_AMBIENT			47
 # define PR_CAP_AMBIENT_IS_SET		1
@@ -25,8 +26,10 @@ static bool bool_arg(char **argv, int i)
 		return false;
 	else if (!strcmp(argv[i], "1"))
 		return true;
-	else
-		errx(1, "wrong argv[%d]", i);
+	else {
+		ksft_exit_fail_msg("wrong argv[%d]\n", i);
+		return false;
+	}
 }
 
 int main(int argc, char **argv)
@@ -39,7 +42,7 @@ int main(int argc, char **argv)
 	 */
 
 	if (argc != 5)
-		errx(1, "wrong argc");
+		ksft_exit_fail_msg("wrong argc\n");
 
 #ifdef HAVE_GETAUXVAL
 	if (getauxval(AT_SECURE))
@@ -51,23 +54,26 @@ int main(int argc, char **argv)
 	capng_get_caps_process();
 
 	if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
-		printf("[FAIL]\tWrong effective state%s\n", atsec);
+		ksft_print_msg("Wrong effective state%s\n", atsec);
 		return 1;
 	}
+
 	if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
-		printf("[FAIL]\tWrong permitted state%s\n", atsec);
+		ksft_print_msg("Wrong permitted state%s\n", atsec);
 		return 1;
 	}
+
 	if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
-		printf("[FAIL]\tWrong inheritable state%s\n", atsec);
+		ksft_print_msg("Wrong inheritable state%s\n", atsec);
 		return 1;
 	}
 
 	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
-		printf("[FAIL]\tWrong ambient state%s\n", atsec);
+		ksft_print_msg("Wrong ambient state%s\n", atsec);
 		return 1;
 	}
 
-	printf("[OK]\tCapabilities after execve were correct\n");
+	ksft_print_msg("%s: Capabilities after execve were correct\n",
+			"validate_cap:");
 	return 0;
 }
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index 98b1d6565f2c..b18b253d7bfb 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -28,6 +28,12 @@ prerequisite()
 	echo "CPU online/offline summary:"
 	online_cpus=`cat $SYSFS/devices/system/cpu/online`
 	online_max=${online_cpus##*-}
+
+	if [[ "$online_cpus" = "$online_max" ]]; then
+		echo "$msg: since there is only one cpu: $online_cpus"
+		exit 0
+	fi
+
 	echo -e "\t Cpus in online state: $online_cpus"
 
 	offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
@@ -89,8 +95,10 @@ online_cpu_expect_success()
 
 	if ! online_cpu $cpu; then
 		echo $FUNCNAME $cpu: unexpected fail >&2
+		exit 1
 	elif ! cpu_is_online $cpu; then
 		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
 	fi
 }
 
@@ -100,8 +108,10 @@ online_cpu_expect_fail()
 
 	if online_cpu $cpu 2> /dev/null; then
 		echo $FUNCNAME $cpu: unexpected success >&2
+		exit 1
 	elif ! cpu_is_offline $cpu; then
 		echo $FUNCNAME $cpu: unexpected online >&2
+		exit 1
 	fi
 }
 
@@ -111,8 +121,10 @@ offline_cpu_expect_success()
 
 	if ! offline_cpu $cpu; then
 		echo $FUNCNAME $cpu: unexpected fail >&2
+		exit 1
 	elif ! cpu_is_offline $cpu; then
 		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
 	fi
 }
 
@@ -122,8 +134,10 @@ offline_cpu_expect_fail()
 
 	if offline_cpu $cpu 2> /dev/null; then
 		echo $FUNCNAME $cpu: unexpected success >&2
+		exit 1
 	elif ! cpu_is_online $cpu; then
 		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
 	fi
 }
 
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 14a03ea1e21d..abc706cf7702 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -8,15 +8,18 @@
 # Released under the terms of the GPL v2.
 
 usage() { # errno [message]
-[ "$2" ] && echo $2
+[ ! -z "$2" ] && echo $2
 echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
 echo " Options:"
 echo "		-h|--help  Show help message"
 echo "		-k|--keep  Keep passed test logs"
 echo "		-v|--verbose Increase verbosity of test messages"
 echo "		-vv        Alias of -v -v (Show all results in stdout)"
+echo "		-vvv       Alias of -v -v -v (Show all commands immediately)"
+echo "		--fail-unsupported Treat UNSUPPORTED as a failure"
 echo "		-d|--debug Debug mode (trace all shell commands)"
 echo "		-l|--logdir <dir> Save logs on the <dir>"
+echo "		            If <dir> is -, all logs output in console only"
 exit $1
 }
 
@@ -47,7 +50,7 @@ parse_opts() { # opts
   local OPT_TEST_CASES=
   local OPT_TEST_DIR=
 
-  while [ "$1" ]; do
+  while [ ! -z "$1" ]; do
     case "$1" in
     --help|-h)
       usage 0
@@ -56,15 +59,20 @@ parse_opts() { # opts
       KEEP_LOG=1
       shift 1
     ;;
-    --verbose|-v|-vv)
+    --verbose|-v|-vv|-vvv)
       VERBOSE=$((VERBOSE + 1))
       [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
+      [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
       shift 1
     ;;
     --debug|-d)
       DEBUG=1
       shift 1
     ;;
+    --fail-unsupported)
+      UNSUPPORTED_RESULT=1
+      shift 1
+    ;;
     --logdir|-l)
       LOG_DIR=$2
       shift 2
@@ -88,7 +96,7 @@ parse_opts() { # opts
     ;;
     esac
   done
-  if [ "$OPT_TEST_CASES" ]; then
+  if [ ! -z "$OPT_TEST_CASES" ]; then
     TEST_CASES=$OPT_TEST_CASES
   fi
 }
@@ -108,6 +116,7 @@ LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
 KEEP_LOG=0
 DEBUG=0
 VERBOSE=0
+UNSUPPORTED_RESULT=0
 # Parse command-line options
 parse_opts $*
 
@@ -119,14 +128,20 @@ if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
 fi
 
 # Preparing logs
-LOG_FILE=$LOG_DIR/ftracetest.log
-mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
-date > $LOG_FILE
+if [ "x$LOG_DIR" = "x-" ]; then
+  LOG_FILE=
+  date
+else
+  LOG_FILE=$LOG_DIR/ftracetest.log
+  mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+  date > $LOG_FILE
+fi
+
 prlog() { # messages
-  echo "$@" | tee -a $LOG_FILE
+  [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
 }
 catlog() { #file
-  cat $1 | tee -a $LOG_FILE
+  [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
 }
 prlog "=== Ftrace unit tests ==="
 
@@ -187,7 +202,7 @@ eval_result() { # sigval
     $UNSUPPORTED)
       prlog "	[UNSUPPORTED]"
       UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
-      return 1 # this is not a bug, but the result should be reported.
+      return $UNSUPPORTED_RESULT # depends on use case
     ;;
     $XFAIL)
       prlog "	[XFAIL]"
@@ -247,12 +262,20 @@ __run_test() { # testfile
 # Run one test case
 run_test() { # testfile
   local testname=`basename $1`
-  local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+  if [ ! -z "$LOG_FILE" ] ; then
+    local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+  else
+    local testlog=/proc/self/fd/1
+  fi
   export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
   testcase $1
   echo "execute$INSTANCE: "$1 > $testlog
   SIG_RESULT=0
-  if [ $VERBOSE -ge 2 ]; then
+  if [ -z "$LOG_FILE" ]; then
+    __run_test $1 2>&1
+  elif [ $VERBOSE -ge 3 ]; then
+    __run_test $1 | tee -a $testlog 2>&1
+  elif [ $VERBOSE -eq 2 ]; then
     __run_test $1 2>> $testlog | tee -a $testlog
   else
     __run_test $1 >> $testlog 2>&1
@@ -260,9 +283,9 @@ run_test() { # testfile
   eval_result $SIG_RESULT
   if [ $? -eq 0 ]; then
     # Remove test log if the test was done as it was expected.
-    [ $KEEP_LOG -eq 0 ] && rm $testlog
+    [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog
   else
-    [ $VERBOSE -ge 1 ] && catlog $testlog
+    [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
     TOTAL_RESULT=1
   fi
   rm -rf $TMPDIR
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 2a1cb9908746..a4fd4c851a5b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -1,6 +1,8 @@
 #!/bin/sh
 # description: Register/unregister many kprobe events
 
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
 # ftrace fentry skip size depends on the machine architecture.
 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 7c647f619d63..f0c0369ccb79 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -7,14 +7,17 @@ TEST_PROGS := run.sh
 include ../lib.mk
 
 all:
-	for DIR in $(SUBDIRS); do		\
+	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		if [ -e $$DIR/$(TEST_PROGS) ]; then
+			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/;
+		fi
 	done
 
 override define RUN_TESTS
-	$(OUTPUT)/run.sh
+	@cd $(OUTPUT); ./run.sh
 endef
 
 override define INSTALL_RULE
@@ -33,7 +36,7 @@ override define EMIT_TESTS
 endef
 
 override define CLEAN
-	for DIR in $(SUBDIRS); do		\
+	@for DIR in $(SUBDIRS); do		\
 		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
 		mkdir $$BUILD_TARGET  -p;	\
 		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index d24ab7421e73..54cd5c414e82 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -394,9 +394,11 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Test requeue functionality\n", basename(argv[0]));
-	printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
-	       broadcast, locked, owner, timeout_ns);
+	ksft_print_header();
+	ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
+	ksft_print_msg(
+		"\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+		broadcast, locked, owner, timeout_ns);
 
 	/*
 	 * FIXME: unit_test is obsolete now that we parse options and the
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
index e0a798ad0d21..08187a16507f 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -78,7 +78,8 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Detect mismatched requeue_pi operations\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
 	       basename(argv[0]));
 
 	if (pthread_create(&child, NULL, blocking_child, NULL)) {
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index 982f83577501..f0542a344d95 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -143,9 +143,10 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Test signal handling during requeue_pi\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Test signal handling during requeue_pi\n",
 	       basename(argv[0]));
-	printf("\tArguments: <none>\n");
+	ksft_print_msg("\tArguments: <none>\n");
 
 	sa.sa_handler = handle_signal;
 	sigemptyset(&sa.sa_mask);
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
index bdc48dc047e5..6216de828093 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -97,8 +97,10 @@ int main(int argc, char **argv)
 		}
 	}
 
-	printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
-	       basename(argv[0]));
+	ksft_print_header();
+	ksft_print_msg(
+		"%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+		basename(argv[0]));
 
 	ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
 	if (ret < 0) {
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index 6aadd560366e..bab3dfe1787f 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -68,9 +68,10 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Block on a futex and wait for timeout\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
 	       basename(argv[0]));
-	printf("\tArguments: timeout=%ldns\n", timeout_ns);
+	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
 
 	/* initialize timeout */
 	to.tv_sec = 0;
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
index d237a8b702f0..26975322545b 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -99,7 +99,8 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
 	       basename(argv[0]));
 
 
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 9a2c56fa7305..da15a63269b4 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -64,7 +64,8 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	printf("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+	ksft_print_header();
+	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
 	       basename(argv[0]));
 
 	info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
index 4e7944984fbb..01989644e50a 100644
--- a/tools/testing/selftests/futex/include/logging.h
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -109,22 +109,20 @@ void log_verbosity(int level)
  */
 void print_result(const char *test_name, int ret)
 {
-	const char *result = "Unknown return code";
-
 	switch (ret) {
 	case RET_PASS:
-		ksft_inc_pass_cnt();
-		result = PASS;
-		break;
+		ksft_test_result_pass("%s\n", test_name);
+		ksft_print_cnts();
+		return;
 	case RET_ERROR:
-		result = ERROR;
-		break;
+		ksft_test_result_error("%s\n", test_name);
+		ksft_print_cnts();
+		return;
 	case RET_FAIL:
-		ksft_inc_fail_cnt();
-		result = FAIL;
-		break;
+		ksft_test_result_fail("%s\n", test_name);
+		ksft_print_cnts();
+		return;
 	}
-	printf("selftests: %s [%s]\n", test_name, result);
 }
 
 /* log level macros */
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 849a90ffe8dd..a97e24edde39 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,7 +1,9 @@
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDLIBS := $(LDLIBS) -lm
 
+ifeq (,$(filter $(ARCH),x86))
 TEST_GEN_FILES := msr aperf
+endif
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
index 7868c106b8b1..d3ab48f91cd6 100755
--- a/tools/testing/selftests/intel_pstate/run.sh
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -29,13 +29,12 @@
 
 EVALUATE_ONLY=0
 
-max_cpus=$(($(nproc)-1))
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+	echo "$0 # Skipped: Test can only run on x86 architectures."
+	exit 0
+fi
 
-# compile programs
-gcc aperf.c -Wall -D_GNU_SOURCE -o aperf  -lm
-[ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1
-gcc -o msr msr.c -lm
-[ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1
+max_cpus=$(($(nproc)-1))
 
 function run_test () {
 
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index a5a4da856dfe..73684c4a1ed6 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -8,7 +8,6 @@
 #include <errno.h>
 #include <string.h>
 #include <fcntl.h>
-
 #include <linux/unistd.h>
 #include <linux/kcmp.h>
 
@@ -16,20 +15,28 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
+#include <sys/epoll.h>
 
 #include "../kselftest.h"
 
-static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2)
+static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
 {
 	return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
 }
 
+static const unsigned int duped_num = 64;
+
 int main(int argc, char **argv)
 {
 	const char kpath[] = "kcmp-test-file";
+	struct kcmp_epoll_slot epoll_slot;
+	struct epoll_event ev;
 	int pid1, pid2;
+	int pipefd[2];
 	int fd1, fd2;
+	int epollfd;
 	int status;
+	int fddup;
 
 	fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
 	pid1 = getpid();
@@ -39,6 +46,37 @@ int main(int argc, char **argv)
 		ksft_exit_fail();
 	}
 
+	if (pipe(pipefd)) {
+		perror("Can't create pipe");
+		ksft_exit_fail();
+	}
+
+	epollfd = epoll_create1(0);
+	if (epollfd < 0) {
+		perror("epoll_create1 failed");
+		ksft_exit_fail();
+	}
+
+	memset(&ev, 0xff, sizeof(ev));
+	ev.events = EPOLLIN | EPOLLOUT;
+
+	if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefd[0], &ev)) {
+		perror("epoll_ctl failed");
+		ksft_exit_fail();
+	}
+
+	fddup = dup2(pipefd[1], duped_num);
+	if (fddup < 0) {
+		perror("dup2 failed");
+		ksft_exit_fail();
+	}
+
+	if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fddup, &ev)) {
+		perror("epoll_ctl failed");
+		ksft_exit_fail();
+	}
+	close(fddup);
+
 	pid2 = fork();
 	if (pid2 < 0) {
 		perror("fork failed");
@@ -95,6 +133,24 @@ int main(int argc, char **argv)
 			ksft_inc_pass_cnt();
 		}
 
+		/* Compare epoll target */
+		epoll_slot = (struct kcmp_epoll_slot) {
+			.efd	= epollfd,
+			.tfd	= duped_num,
+			.toff	= 0,
+		};
+		ret = sys_kcmp(pid1, pid1, KCMP_EPOLL_TFD, pipefd[1],
+			       (unsigned long)(void *)&epoll_slot);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
+			ksft_inc_fail_cnt();
+			ret = -1;
+		} else {
+			printf("PASS: 0 returned as expected\n");
+			ksft_inc_pass_cnt();
+		}
+
 		ksft_print_cnts();
 
 		if (ret)
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 08e90c2cc5cb..1ae565ed9bf0 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -19,7 +19,8 @@
 #define KSFT_FAIL  1
 #define KSFT_XFAIL 2
 #define KSFT_XPASS 3
-#define KSFT_SKIP  4
+/* Treat skip as pass */
+#define KSFT_SKIP  KSFT_PASS
 
 /* counters */
 struct ksft_count {
@@ -28,6 +29,7 @@ struct ksft_count {
 	unsigned int ksft_xfail;
 	unsigned int ksft_xpass;
 	unsigned int ksft_xskip;
+	unsigned int ksft_error;
 };
 
 static struct ksft_count ksft_cnt;
@@ -36,7 +38,7 @@ static inline int ksft_test_num(void)
 {
 	return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
 		ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
-		ksft_cnt.ksft_xskip;
+		ksft_cnt.ksft_xskip + ksft_cnt.ksft_error;
 }
 
 static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; }
@@ -44,6 +46,14 @@ static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; }
 static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; }
 static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; }
 static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; }
+static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; }
+
+static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; }
+static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; }
+static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; }
+static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; }
+static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; }
+static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
 
 static inline void ksft_print_header(void)
 {
@@ -52,6 +62,10 @@ static inline void ksft_print_header(void)
 
 static inline void ksft_print_cnts(void)
 {
+	printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+		ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
+		ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
+		ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
 	printf("1..%d\n", ksft_test_num());
 }
 
@@ -101,6 +115,18 @@ static inline void ksft_test_result_skip(const char *msg, ...)
 	va_end(args);
 }
 
+static inline void ksft_test_result_error(const char *msg, ...)
+{
+	va_list args;
+
+	ksft_cnt.ksft_error++;
+
+	va_start(args, msg);
+	printf("not ok %d # error ", ksft_test_num());
+	vprintf(msg, args);
+	va_end(args);
+}
+
 static inline int ksft_exit_pass(void)
 {
 	ksft_print_cnts();
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index c56f72e07cd7..e81bd28bdd89 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -51,6 +51,9 @@
 #define __KSELFTEST_HARNESS_H
 
 #define _GNU_SOURCE
+#include <asm/types.h>
+#include <errno.h>
+#include <stdbool.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -84,6 +87,14 @@
  * E.g., #define TH_LOG_ENABLED 1
  *
  * If no definition is provided, logging is enabled by default.
+ *
+ * If there is no way to print an error message for the process running the
+ * test (e.g. not allowed to write to stderr), it is still possible to get the
+ * ASSERT_* number for which the test failed.  This behavior can be enabled by
+ * writing `_metadata->no_print = true;` before the check sequence that is
+ * unable to print.  When an error occur, instead of printing an error message
+ * and calling `abort(3)`, the test process call `_exit(2)` with the assert
+ * number as argument, which is then printed by the parent process.
  */
 #define TH_LOG(fmt, ...) do { \
 	if (TH_LOG_ENABLED) \
@@ -555,12 +566,18 @@
  * return while still providing an optional block to the API consumer.
  */
 #define OPTIONAL_HANDLER(_assert) \
-	for (; _metadata->trigger;  _metadata->trigger = __bail(_assert))
+	for (; _metadata->trigger; _metadata->trigger = \
+			__bail(_assert, _metadata->no_print, _metadata->step))
+
+#define __INC_STEP(_metadata) \
+	if (_metadata->passed && _metadata->step < 255) \
+		_metadata->step++;
 
 #define __EXPECT(_expected, _seen, _t, _assert) do { \
 	/* Avoid multiple evaluation of the cases */ \
 	__typeof__(_expected) __exp = (_expected); \
 	__typeof__(_seen) __seen = (_seen); \
+	if (_assert) __INC_STEP(_metadata); \
 	if (!(__exp _t __seen)) { \
 		unsigned long long __exp_print = (uintptr_t)__exp; \
 		unsigned long long __seen_print = (uintptr_t)__seen; \
@@ -576,6 +593,7 @@
 #define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
 	const char *__exp = (_expected); \
 	const char *__seen = (_seen); \
+	if (_assert) __INC_STEP(_metadata); \
 	if (!(strcmp(__exp, __seen) _t 0))  { \
 		__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
 		_metadata->passed = 0; \
@@ -590,6 +608,8 @@ struct __test_metadata {
 	int termsig;
 	int passed;
 	int trigger; /* extra handler after the evaluation */
+	__u8 step;
+	bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
 	struct __test_metadata *prev, *next;
 };
 
@@ -634,10 +654,13 @@ static inline void __register_test(struct __test_metadata *t)
 	}
 }
 
-static inline int __bail(int for_realz)
+static inline int __bail(int for_realz, bool no_print, __u8 step)
 {
-	if (for_realz)
+	if (for_realz) {
+		if (no_print)
+			_exit(step);
 		abort();
+	}
 	return 0;
 }
 
@@ -655,18 +678,24 @@ void __run_test(struct __test_metadata *t)
 		t->passed = 0;
 	} else if (child_pid == 0) {
 		t->fn(t);
-		_exit(t->passed);
+		/* return the step that failed or 0 */
+		_exit(t->passed ? 0 : t->step);
 	} else {
 		/* TODO(wad) add timeout support. */
 		waitpid(child_pid, &status, 0);
 		if (WIFEXITED(status)) {
-			t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0;
+			t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
 			if (t->termsig != -1) {
 				fprintf(TH_LOG_STREAM,
 					"%s: Test exited normally "
 					"instead of by signal (code: %d)\n",
 					t->name,
 					WEXITSTATUS(status));
+			} else if (!t->passed) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test failed at step #%d\n",
+					t->name,
+					WEXITSTATUS(status));
 			}
 		} else if (WIFSIGNALED(status)) {
 			t->passed = 0;
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 959273c3a52e..f65886af7c0c 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -6,20 +6,49 @@ ifeq (0,$(MAKELEVEL))
 OUTPUT := $(shell pwd)
 endif
 
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
+.ONESHELL:
 define RUN_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+	@test_num=`echo 0`;
+	@echo "TAP version 13";
+	@for TEST in $(1); do				\
 		BASENAME_TEST=`basename $$TEST`;	\
-		cd `dirname $$TEST`; (./$$BASENAME_TEST && echo "selftests: $$BASENAME_TEST [PASS]") || echo "selftests:  $$BASENAME_TEST [FAIL]"; cd -;\
+		test_num=`echo $$test_num+1 | bc`;	\
+		echo "selftests: $$BASENAME_TEST";	\
+		echo "========================================";	\
+		if [ ! -x $$TEST ]; then	\
+			echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+			echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \
+		else					\
+			cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests:  $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\
+		fi;					\
 	done;
 endef
 
 run_tests: all
-	$(RUN_TESTS)
+ifneq ($(KBUILD_SRC),)
+	@if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+		@rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+	fi
+	@if [ "X$(TEST_PROGS)" != "X" ]; then
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+	else
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+	fi
+else
+	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
 
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
@@ -27,10 +56,10 @@ define INSTALL_RULE
 		echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";	\
 		rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;		\
 	fi
-	@if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
+	@if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
 		mkdir -p ${INSTALL_PATH};										\
-		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
-		rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
+		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
+		rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
 	fi
 endef
 
@@ -42,15 +71,20 @@ else
 endif
 
 define EMIT_TESTS
-	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
+	@for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
+		echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;
 endef
 
 emit_tests:
 	$(EMIT_TESTS)
 
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
 define CLEAN
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 endef
@@ -58,6 +92,15 @@ endef
 clean:
 	$(CLEAN)
 
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
 $(OUTPUT)/%:%.c
 	$(LINK.c) $^ $(LDLIBS) -o $@
 
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index ad8a0897e47f..bc9d02d615da 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -3,7 +3,7 @@ CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := run_fuse_test.sh
+TEST_PROGS := run_tests.sh
 TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index 67908b18f035..7f3617274bf5 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -33,7 +33,7 @@
 #include <unistd.h>
 
 #define MFD_DEF_SIZE 8192
-#define STACK_SIZE 65535
+#define STACK_SIZE 65536
 
 static int sys_memfd_create(const char *name,
 			    unsigned int flags)
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 26546892cd54..f94c6d1fb46f 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -18,12 +18,48 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
+#define MEMFD_STR	"memfd:"
+#define SHARED_FT_STR	"(shared file-table)"
+
 #define MFD_DEF_SIZE 8192
 #define STACK_SIZE 65536
 
+/*
+ * Default is not to test hugetlbfs
+ */
+static int hugetlbfs_test;
+static size_t mfd_def_size = MFD_DEF_SIZE;
+
+/*
+ * Copied from mlock2-tests.c
+ */
+static unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(f);
+	return hps;
+}
+
 static int sys_memfd_create(const char *name,
 			    unsigned int flags)
 {
+	if (hugetlbfs_test)
+		flags |= MFD_HUGETLB;
+
 	return syscall(__NR_memfd_create, name, flags);
 }
 
@@ -150,7 +186,7 @@ static void *mfd_assert_mmap_shared(int fd)
 	void *p;
 
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ | PROT_WRITE,
 		 MAP_SHARED,
 		 fd,
@@ -168,7 +204,7 @@ static void *mfd_assert_mmap_private(int fd)
 	void *p;
 
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ,
 		 MAP_PRIVATE,
 		 fd,
@@ -223,7 +259,7 @@ static void mfd_assert_read(int fd)
 
 	/* verify PROT_READ *is* allowed */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ,
 		 MAP_PRIVATE,
 		 fd,
@@ -232,11 +268,11 @@ static void mfd_assert_read(int fd)
 		printf("mmap() failed: %m\n");
 		abort();
 	}
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
 
 	/* verify MAP_PRIVATE is *always* allowed (even writable) */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ | PROT_WRITE,
 		 MAP_PRIVATE,
 		 fd,
@@ -245,7 +281,7 @@ static void mfd_assert_read(int fd)
 		printf("mmap() failed: %m\n");
 		abort();
 	}
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
 }
 
 static void mfd_assert_write(int fd)
@@ -254,16 +290,22 @@ static void mfd_assert_write(int fd)
 	void *p;
 	int r;
 
-	/* verify write() succeeds */
-	l = write(fd, "\0\0\0\0", 4);
-	if (l != 4) {
-		printf("write() failed: %m\n");
-		abort();
+	/*
+	 * huegtlbfs does not support write, but we want to
+	 * verify everything else here.
+	 */
+	if (!hugetlbfs_test) {
+		/* verify write() succeeds */
+		l = write(fd, "\0\0\0\0", 4);
+		if (l != 4) {
+			printf("write() failed: %m\n");
+			abort();
+		}
 	}
 
 	/* verify PROT_READ | PROT_WRITE is allowed */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ | PROT_WRITE,
 		 MAP_SHARED,
 		 fd,
@@ -273,11 +315,11 @@ static void mfd_assert_write(int fd)
 		abort();
 	}
 	*(char *)p = 0;
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
 
 	/* verify PROT_WRITE is allowed */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_WRITE,
 		 MAP_SHARED,
 		 fd,
@@ -287,12 +329,12 @@ static void mfd_assert_write(int fd)
 		abort();
 	}
 	*(char *)p = 0;
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
 
 	/* verify PROT_READ with MAP_SHARED is allowed and a following
 	 * mprotect(PROT_WRITE) allows writing */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ,
 		 MAP_SHARED,
 		 fd,
@@ -302,20 +344,20 @@ static void mfd_assert_write(int fd)
 		abort();
 	}
 
-	r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
 	if (r < 0) {
 		printf("mprotect() failed: %m\n");
 		abort();
 	}
 
 	*(char *)p = 0;
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
 
 	/* verify PUNCH_HOLE works */
 	r = fallocate(fd,
 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 		      0,
-		      MFD_DEF_SIZE);
+		      mfd_def_size);
 	if (r < 0) {
 		printf("fallocate(PUNCH_HOLE) failed: %m\n");
 		abort();
@@ -337,7 +379,7 @@ static void mfd_fail_write(int fd)
 
 	/* verify PROT_READ | PROT_WRITE is not allowed */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ | PROT_WRITE,
 		 MAP_SHARED,
 		 fd,
@@ -349,7 +391,7 @@ static void mfd_fail_write(int fd)
 
 	/* verify PROT_WRITE is not allowed */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_WRITE,
 		 MAP_SHARED,
 		 fd,
@@ -362,13 +404,13 @@ static void mfd_fail_write(int fd)
 	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
 	 * allowed. Note that for r/w the kernel already prevents the mmap. */
 	p = mmap(NULL,
-		 MFD_DEF_SIZE,
+		 mfd_def_size,
 		 PROT_READ,
 		 MAP_SHARED,
 		 fd,
 		 0);
 	if (p != MAP_FAILED) {
-		r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
 		if (r >= 0) {
 			printf("mmap()+mprotect() didn't fail as expected\n");
 			abort();
@@ -379,7 +421,7 @@ static void mfd_fail_write(int fd)
 	r = fallocate(fd,
 		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 		      0,
-		      MFD_DEF_SIZE);
+		      mfd_def_size);
 	if (r >= 0) {
 		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
 		abort();
@@ -390,13 +432,13 @@ static void mfd_assert_shrink(int fd)
 {
 	int r, fd2;
 
-	r = ftruncate(fd, MFD_DEF_SIZE / 2);
+	r = ftruncate(fd, mfd_def_size / 2);
 	if (r < 0) {
 		printf("ftruncate(SHRINK) failed: %m\n");
 		abort();
 	}
 
-	mfd_assert_size(fd, MFD_DEF_SIZE / 2);
+	mfd_assert_size(fd, mfd_def_size / 2);
 
 	fd2 = mfd_assert_open(fd,
 			      O_RDWR | O_CREAT | O_TRUNC,
@@ -410,7 +452,7 @@ static void mfd_fail_shrink(int fd)
 {
 	int r;
 
-	r = ftruncate(fd, MFD_DEF_SIZE / 2);
+	r = ftruncate(fd, mfd_def_size / 2);
 	if (r >= 0) {
 		printf("ftruncate(SHRINK) didn't fail as expected\n");
 		abort();
@@ -425,31 +467,31 @@ static void mfd_assert_grow(int fd)
 {
 	int r;
 
-	r = ftruncate(fd, MFD_DEF_SIZE * 2);
+	r = ftruncate(fd, mfd_def_size * 2);
 	if (r < 0) {
 		printf("ftruncate(GROW) failed: %m\n");
 		abort();
 	}
 
-	mfd_assert_size(fd, MFD_DEF_SIZE * 2);
+	mfd_assert_size(fd, mfd_def_size * 2);
 
 	r = fallocate(fd,
 		      0,
 		      0,
-		      MFD_DEF_SIZE * 4);
+		      mfd_def_size * 4);
 	if (r < 0) {
 		printf("fallocate(ALLOC) failed: %m\n");
 		abort();
 	}
 
-	mfd_assert_size(fd, MFD_DEF_SIZE * 4);
+	mfd_assert_size(fd, mfd_def_size * 4);
 }
 
 static void mfd_fail_grow(int fd)
 {
 	int r;
 
-	r = ftruncate(fd, MFD_DEF_SIZE * 2);
+	r = ftruncate(fd, mfd_def_size * 2);
 	if (r >= 0) {
 		printf("ftruncate(GROW) didn't fail as expected\n");
 		abort();
@@ -458,7 +500,7 @@ static void mfd_fail_grow(int fd)
 	r = fallocate(fd,
 		      0,
 		      0,
-		      MFD_DEF_SIZE * 4);
+		      mfd_def_size * 4);
 	if (r >= 0) {
 		printf("fallocate(ALLOC) didn't fail as expected\n");
 		abort();
@@ -467,25 +509,37 @@ static void mfd_fail_grow(int fd)
 
 static void mfd_assert_grow_write(int fd)
 {
-	static char buf[MFD_DEF_SIZE * 8];
+	static char *buf;
 	ssize_t l;
 
-	l = pwrite(fd, buf, sizeof(buf), 0);
-	if (l != sizeof(buf)) {
+	buf = malloc(mfd_def_size * 8);
+	if (!buf) {
+		printf("malloc(%d) failed: %m\n", mfd_def_size * 8);
+		abort();
+	}
+
+	l = pwrite(fd, buf, mfd_def_size * 8, 0);
+	if (l != (mfd_def_size * 8)) {
 		printf("pwrite() failed: %m\n");
 		abort();
 	}
 
-	mfd_assert_size(fd, MFD_DEF_SIZE * 8);
+	mfd_assert_size(fd, mfd_def_size * 8);
 }
 
 static void mfd_fail_grow_write(int fd)
 {
-	static char buf[MFD_DEF_SIZE * 8];
+	static char *buf;
 	ssize_t l;
 
-	l = pwrite(fd, buf, sizeof(buf), 0);
-	if (l == sizeof(buf)) {
+	buf = malloc(mfd_def_size * 8);
+	if (!buf) {
+		printf("malloc(%d) failed: %m\n", mfd_def_size * 8);
+		abort();
+	}
+
+	l = pwrite(fd, buf, mfd_def_size * 8, 0);
+	if (l == (mfd_def_size * 8)) {
 		printf("pwrite() didn't fail as expected\n");
 		abort();
 	}
@@ -543,6 +597,8 @@ static void test_create(void)
 	char buf[2048];
 	int fd;
 
+	printf("%s CREATE\n", MEMFD_STR);
+
 	/* test NULL name */
 	mfd_fail_new(NULL, 0);
 
@@ -570,13 +626,18 @@ static void test_create(void)
 	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
 	close(fd);
 
-	/* verify MFD_ALLOW_SEALING is allowed */
-	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
-	close(fd);
-
-	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
-	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
-	close(fd);
+	if (!hugetlbfs_test) {
+		/* verify MFD_ALLOW_SEALING is allowed */
+		fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+		close(fd);
+
+		/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+		fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+		close(fd);
+	} else {
+		/* sealing is not supported on hugetlbfs */
+		mfd_fail_new("", MFD_ALLOW_SEALING);
+	}
 }
 
 /*
@@ -587,8 +648,14 @@ static void test_basic(void)
 {
 	int fd;
 
+	/* hugetlbfs does not contain sealing support */
+	if (hugetlbfs_test)
+		return;
+
+	printf("%s BASIC\n", MEMFD_STR);
+
 	fd = mfd_assert_new("kern_memfd_basic",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 
 	/* add basic seals */
@@ -619,7 +686,7 @@ static void test_basic(void)
 
 	/* verify sealing does not work without MFD_ALLOW_SEALING */
 	fd = mfd_assert_new("kern_memfd_basic",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC);
 	mfd_assert_has_seals(fd, F_SEAL_SEAL);
 	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
@@ -630,6 +697,28 @@ static void test_basic(void)
 }
 
 /*
+ * hugetlbfs doesn't support seals or write, so just verify grow and shrink
+ * on a hugetlbfs file created via memfd_create.
+ */
+static void test_hugetlbfs_grow_shrink(void)
+{
+	int fd;
+
+	printf("%s HUGETLBFS-GROW-SHRINK\n", MEMFD_STR);
+
+	fd = mfd_assert_new("kern_memfd_seal_write",
+			    mfd_def_size,
+			    MFD_CLOEXEC);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_assert_shrink(fd);
+	mfd_assert_grow(fd);
+
+	close(fd);
+}
+
+/*
  * Test SEAL_WRITE
  * Test whether SEAL_WRITE actually prevents modifications.
  */
@@ -637,8 +726,17 @@ static void test_seal_write(void)
 {
 	int fd;
 
+	/*
+	 * hugetlbfs does not contain sealing or write support.  Just test
+	 * basic grow and shrink via test_hugetlbfs_grow_shrink.
+	 */
+	if (hugetlbfs_test)
+		return test_hugetlbfs_grow_shrink();
+
+	printf("%s SEAL-WRITE\n", MEMFD_STR);
+
 	fd = mfd_assert_new("kern_memfd_seal_write",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
@@ -661,8 +759,14 @@ static void test_seal_shrink(void)
 {
 	int fd;
 
+	/* hugetlbfs does not contain sealing support */
+	if (hugetlbfs_test)
+		return;
+
+	printf("%s SEAL-SHRINK\n", MEMFD_STR);
+
 	fd = mfd_assert_new("kern_memfd_seal_shrink",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
@@ -685,8 +789,14 @@ static void test_seal_grow(void)
 {
 	int fd;
 
+	/* hugetlbfs does not contain sealing support */
+	if (hugetlbfs_test)
+		return;
+
+	printf("%s SEAL-GROW\n", MEMFD_STR);
+
 	fd = mfd_assert_new("kern_memfd_seal_grow",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 	mfd_assert_add_seals(fd, F_SEAL_GROW);
@@ -709,8 +819,14 @@ static void test_seal_resize(void)
 {
 	int fd;
 
+	/* hugetlbfs does not contain sealing support */
+	if (hugetlbfs_test)
+		return;
+
+	printf("%s SEAL-RESIZE\n", MEMFD_STR);
+
 	fd = mfd_assert_new("kern_memfd_seal_resize",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
@@ -726,15 +842,52 @@ static void test_seal_resize(void)
 }
 
 /*
+ * hugetlbfs does not support seals.  Basic test to dup the memfd created
+ * fd and perform some basic operations on it.
+ */
+static void hugetlbfs_dup(char *b_suffix)
+{
+	int fd, fd2;
+
+	printf("%s HUGETLBFS-DUP %s\n", MEMFD_STR, b_suffix);
+
+	fd = mfd_assert_new("kern_memfd_share_dup",
+			    mfd_def_size,
+			    MFD_CLOEXEC);
+
+	fd2 = mfd_assert_dup(fd);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+
+	mfd_assert_shrink(fd2);
+	mfd_assert_grow(fd2);
+
+	close(fd2);
+	close(fd);
+}
+
+/*
  * Test sharing via dup()
  * Test that seals are shared between dupped FDs and they're all equal.
  */
-static void test_share_dup(void)
+static void test_share_dup(char *banner, char *b_suffix)
 {
 	int fd, fd2;
 
+	/*
+	 * hugetlbfs does not contain sealing support.  Perform some
+	 * basic testing on dup'ed fd instead via hugetlbfs_dup.
+	 */
+	if (hugetlbfs_test) {
+		hugetlbfs_dup(b_suffix);
+		return;
+	}
+
+	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
+
 	fd = mfd_assert_new("kern_memfd_share_dup",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 
@@ -768,13 +921,19 @@ static void test_share_dup(void)
  * Test sealing with active mmap()s
  * Modifying seals is only allowed if no other mmap() refs exist.
  */
-static void test_share_mmap(void)
+static void test_share_mmap(char *banner, char *b_suffix)
 {
 	int fd;
 	void *p;
 
+	/* hugetlbfs does not contain sealing support */
+	if (hugetlbfs_test)
+		return;
+
+	printf("%s %s %s\n", MEMFD_STR,  banner, b_suffix);
+
 	fd = mfd_assert_new("kern_memfd_share_mmap",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 
@@ -784,14 +943,40 @@ static void test_share_mmap(void)
 	mfd_assert_has_seals(fd, 0);
 	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
 	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
 
 	/* readable ref allows sealing */
 	p = mfd_assert_mmap_private(fd);
 	mfd_assert_add_seals(fd, F_SEAL_WRITE);
 	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
-	munmap(p, MFD_DEF_SIZE);
+	munmap(p, mfd_def_size);
+
+	close(fd);
+}
+
+/*
+ * Basic test to make sure we can open the hugetlbfs fd via /proc and
+ * perform some simple operations on it.
+ */
+static void hugetlbfs_proc_open(char *b_suffix)
+{
+	int fd, fd2;
+
+	printf("%s HUGETLBFS-PROC-OPEN %s\n", MEMFD_STR, b_suffix);
 
+	fd = mfd_assert_new("kern_memfd_share_open",
+			    mfd_def_size,
+			    MFD_CLOEXEC);
+
+	fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+
+	mfd_assert_shrink(fd2);
+	mfd_assert_grow(fd2);
+
+	close(fd2);
 	close(fd);
 }
 
@@ -801,12 +986,23 @@ static void test_share_mmap(void)
  * This is *not* like dup(), but like a real separate open(). Make sure the
  * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
  */
-static void test_share_open(void)
+static void test_share_open(char *banner, char *b_suffix)
 {
 	int fd, fd2;
 
+	/*
+	 * hugetlbfs does not contain sealing support.  So test basic
+	 * functionality of using /proc fd via hugetlbfs_proc_open
+	 */
+	if (hugetlbfs_test) {
+		hugetlbfs_proc_open(b_suffix);
+		return;
+	}
+
+	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
+
 	fd = mfd_assert_new("kern_memfd_share_open",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 
@@ -841,13 +1037,19 @@ static void test_share_open(void)
  * Test sharing via fork()
  * Test whether seal-modifications work as expected with forked childs.
  */
-static void test_share_fork(void)
+static void test_share_fork(char *banner, char *b_suffix)
 {
 	int fd;
 	pid_t pid;
 
+	/* hugetlbfs does not contain sealing support */
+	if (hugetlbfs_test)
+		return;
+
+	printf("%s %s %s\n", MEMFD_STR, banner, b_suffix);
+
 	fd = mfd_assert_new("kern_memfd_share_fork",
-			    MFD_DEF_SIZE,
+			    mfd_def_size,
 			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
 	mfd_assert_has_seals(fd, 0);
 
@@ -870,40 +1072,40 @@ int main(int argc, char **argv)
 {
 	pid_t pid;
 
-	printf("memfd: CREATE\n");
+	if (argc == 2) {
+		if (!strcmp(argv[1], "hugetlbfs")) {
+			unsigned long hpage_size = default_huge_page_size();
+
+			if (!hpage_size) {
+				printf("Unable to determine huge page size\n");
+				abort();
+			}
+
+			hugetlbfs_test = 1;
+			mfd_def_size = hpage_size * 2;
+		}
+	}
+
 	test_create();
-	printf("memfd: BASIC\n");
 	test_basic();
 
-	printf("memfd: SEAL-WRITE\n");
 	test_seal_write();
-	printf("memfd: SEAL-SHRINK\n");
 	test_seal_shrink();
-	printf("memfd: SEAL-GROW\n");
 	test_seal_grow();
-	printf("memfd: SEAL-RESIZE\n");
 	test_seal_resize();
 
-	printf("memfd: SHARE-DUP\n");
-	test_share_dup();
-	printf("memfd: SHARE-MMAP\n");
-	test_share_mmap();
-	printf("memfd: SHARE-OPEN\n");
-	test_share_open();
-	printf("memfd: SHARE-FORK\n");
-	test_share_fork();
+	test_share_dup("SHARE-DUP", "");
+	test_share_mmap("SHARE-MMAP", "");
+	test_share_open("SHARE-OPEN", "");
+	test_share_fork("SHARE-FORK", "");
 
 	/* Run test-suite in a multi-threaded environment with a shared
 	 * file-table. */
 	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
-	printf("memfd: SHARE-DUP (shared file-table)\n");
-	test_share_dup();
-	printf("memfd: SHARE-MMAP (shared file-table)\n");
-	test_share_mmap();
-	printf("memfd: SHARE-OPEN (shared file-table)\n");
-	test_share_open();
-	printf("memfd: SHARE-FORK (shared file-table)\n");
-	test_share_fork();
+	test_share_dup("SHARE-DUP", SHARED_FT_STR);
+	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
+	test_share_open("SHARE-OPEN", SHARED_FT_STR);
+	test_share_fork("SHARE-FORK", SHARED_FT_STR);
 	join_idle_thread(pid);
 
 	printf("memfd: DONE\n");
diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_tests.sh
new file mode 100755
index 000000000000..daabb350697c
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_tests.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# please run as root
+
+#
+# Normal tests requiring no special resources
+#
+./run_fuse_test.sh
+./memfd_test
+
+#
+# To test memfd_create with hugetlbfs, there needs to be hpages_test
+# huge pages free.  Attempt to allocate enough pages to test.
+#
+hpages_test=8
+
+#
+# Get count of free huge pages from /proc/meminfo
+#
+while read name size unit; do
+        if [ "$name" = "HugePages_Free:" ]; then
+                freepgs=$size
+        fi
+done < /proc/meminfo
+
+#
+# If not enough free huge pages for test, attempt to increase
+#
+if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
+	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+	hpages_needed=`expr $hpages_test - $freepgs`
+
+	echo 3 > /proc/sys/vm/drop_caches
+	echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+	if [ $? -ne 0 ]; then
+		echo "Please run this test as root"
+		exit 1
+	fi
+	while read name size unit; do
+		if [ "$name" = "HugePages_Free:" ]; then
+			freepgs=$size
+		fi
+	done < /proc/meminfo
+fi
+
+#
+# If still not enough huge pages available, exit.  But, give back any huge
+# pages potentially allocated above.
+#
+if [ $freepgs -lt $hpages_test ]; then
+	# nr_hugepgs non-zero only if we attempted to increase
+	if [ -n "$nr_hugepgs" ]; then
+		echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+	fi
+	printf "Not enough huge pages available (%d < %d)\n" \
+		$freepgs $needpgs
+	exit 1
+fi
+
+#
+# Run the hugetlbfs test
+#
+./memfd_test hugetlbfs
+
+#
+# Give back any huge pages allocated for the test
+#
+if [ -n "$nr_hugepgs" ]; then
+	echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+fi
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index 79a664aeb8d7..152823b6cb21 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -5,8 +5,8 @@ TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 include ../lib.mk
 
 override define RUN_TESTS
-	@./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
-	@./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+	@$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+	@$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
 endef
 
 override define EMIT_TESTS
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index afe109e5508a..c612d6e38c62 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,4 @@
+msg_zerocopy
 socket
 psock_fanout
 psock_tpacket
@@ -5,3 +6,4 @@ reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
+reuseaddr_conflict
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f6c9dbf478f8..d86bca991f45 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -3,11 +3,11 @@
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
 TEST_GEN_FILES =  socket
-TEST_GEN_FILES += psock_fanout psock_tpacket
-TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_FILES += reuseport_dualstack
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
new file mode 100644
index 000000000000..3ab6ec403905
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -0,0 +1,697 @@
+/* Evaluate MSG_ZEROCOPY
+ *
+ * Send traffic between two processes over one of the supported
+ * protocols and modes:
+ *
+ * PF_INET/PF_INET6
+ * - SOCK_STREAM
+ * - SOCK_DGRAM
+ * - SOCK_DGRAM with UDP_CORK
+ * - SOCK_RAW
+ * - SOCK_RAW with IP_HDRINCL
+ *
+ * PF_PACKET
+ * - SOCK_DGRAM
+ * - SOCK_RAW
+ *
+ * Start this program on two connected hosts, one in send mode and
+ * the other with option '-r' to put it in receiver mode.
+ *
+ * If zerocopy mode ('-z') is enabled, the sender will verify that
+ * the kernel queues completions on the error queue for all zerocopy
+ * transfers.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY		5
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	60
+#endif
+
+#ifndef SO_EE_CODE_ZEROCOPY_COPIED
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+static int  cfg_cork;
+static bool cfg_cork_mixed;
+static int  cfg_cpu		= -1;		/* default: pin to last cpu */
+static int  cfg_family		= PF_UNSPEC;
+static int  cfg_ifindex		= 1;
+static int  cfg_payload_len;
+static int  cfg_port		= 8000;
+static bool cfg_rx;
+static int  cfg_runtime_ms	= 4200;
+static int  cfg_verbose;
+static int  cfg_waittime_ms	= 500;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+
+static char payload[IP_MAXPACKET];
+static long packets, bytes, completions, expected_completions;
+static int  zerocopied = -1;
+static uint32_t next_completion;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_words; i++)
+		sum += start[i];
+
+	while (sum >> 16)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int do_setcpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		error(1, 0, "setaffinity %d", cpu);
+
+	if (cfg_verbose)
+		fprintf(stderr, "cpu: %u\n", cpu);
+
+	return 0;
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_poll(int fd, int events)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = events;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, cfg_waittime_ms);
+	if (ret == -1)
+		error(1, errno, "poll");
+
+	return ret && (pfd.revents & events);
+}
+
+static int do_accept(int fd)
+{
+	int fda = fd;
+
+	fd = accept(fda, NULL, NULL);
+	if (fd == -1)
+		error(1, errno, "accept");
+	if (close(fda))
+		error(1, errno, "close listen sock");
+
+	return fd;
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy)
+{
+	int ret, len, i, flags;
+
+	len = 0;
+	for (i = 0; i < msg->msg_iovlen; i++)
+		len += msg->msg_iov[i].iov_len;
+
+	flags = MSG_DONTWAIT;
+	if (do_zerocopy)
+		flags |= MSG_ZEROCOPY;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "send");
+	if (cfg_verbose && ret != len)
+		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+
+	if (len) {
+		packets++;
+		bytes += ret;
+		if (do_zerocopy && ret)
+			expected_completions++;
+	}
+
+	return true;
+}
+
+static void do_sendmsg_corked(int fd, struct msghdr *msg)
+{
+	bool do_zerocopy = cfg_zerocopy;
+	int i, payload_len, extra_len;
+
+	/* split up the packet. for non-multiple, make first buffer longer */
+	payload_len = cfg_payload_len / cfg_cork;
+	extra_len = cfg_payload_len - (cfg_cork * payload_len);
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+	for (i = 0; i < cfg_cork; i++) {
+
+		/* in mixed-frags mode, alternate zerocopy and copy frags
+		 * start with non-zerocopy, to ensure attach later works
+		 */
+		if (cfg_cork_mixed)
+			do_zerocopy = (i & 1);
+
+		msg->msg_iov[0].iov_len = payload_len + extra_len;
+		extra_len = 0;
+
+		do_sendmsg(fd, msg, do_zerocopy);
+	}
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+}
+
+static int setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+	struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in *saddr = (void *) &cfg_src_addr;
+
+	memset(iph, 0, sizeof(*iph));
+
+	iph->version	= 4;
+	iph->tos	= 0;
+	iph->ihl	= 5;
+	iph->ttl	= 2;
+	iph->saddr	= saddr->sin_addr.s_addr;
+	iph->daddr	= daddr->sin_addr.s_addr;
+	iph->protocol	= IPPROTO_EGP;
+	iph->tot_len	= htons(sizeof(*iph) + payload_len);
+	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
+
+	return sizeof(*iph);
+}
+
+static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
+{
+	struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
+
+	memset(ip6h, 0, sizeof(*ip6h));
+
+	ip6h->version		= 6;
+	ip6h->payload_len	= htons(payload_len);
+	ip6h->nexthdr		= IPPROTO_EGP;
+	ip6h->hop_limit		= 2;
+	ip6h->saddr		= saddr->sin6_addr;
+	ip6h->daddr		= daddr->sin6_addr;
+
+	return sizeof(*ip6h);
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+	int fd;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+	if (cfg_zerocopy)
+		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
+
+	if (domain != PF_PACKET)
+		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+			error(1, errno, "connect");
+
+	return fd;
+}
+
+static bool do_recv_completion(int fd)
+{
+	struct sock_extended_err *serr;
+	struct msghdr msg = {};
+	struct cmsghdr *cm;
+	uint32_t hi, lo, range;
+	int ret, zerocopy;
+	char control[100];
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "recvmsg notification");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (!cm)
+		error(1, 0, "cmsg: no cmsg");
+	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+		error(1, 0, "serr: wrong type: %d.%d",
+		      cm->cmsg_level, cm->cmsg_type);
+
+	serr = (void *) CMSG_DATA(cm);
+	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
+	if (serr->ee_errno != 0)
+		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+
+	hi = serr->ee_data;
+	lo = serr->ee_info;
+	range = hi - lo + 1;
+
+	/* Detect notification gaps. These should not happen often, if at all.
+	 * Gaps can occur due to drops, reordering and retransmissions.
+	 */
+	if (lo != next_completion)
+		fprintf(stderr, "gap: %u..%u does not append to %u\n",
+			lo, hi, next_completion);
+	next_completion = hi + 1;
+
+	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+	if (zerocopied == -1)
+		zerocopied = zerocopy;
+	else if (zerocopied != zerocopy) {
+		fprintf(stderr, "serr: inconsistent\n");
+		zerocopied = zerocopy;
+	}
+
+	if (cfg_verbose >= 2)
+		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+			range, hi, lo);
+
+	completions += range;
+	return true;
+}
+
+/* Read all outstanding messages on the errqueue */
+static void do_recv_completions(int fd)
+{
+	while (do_recv_completion(fd)) {}
+}
+
+/* Wait for all remaining completions on the errqueue */
+static void do_recv_remaining_completions(int fd)
+{
+	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+
+	while (completions < expected_completions &&
+	       gettimeofday_ms() < tstop) {
+		if (do_poll(fd, POLLERR))
+			do_recv_completions(fd);
+	}
+
+	if (completions < expected_completions)
+		fprintf(stderr, "missing notifications: %lu < %lu\n",
+			completions, expected_completions);
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+	struct iovec iov[3] = { {0} };
+	struct sockaddr_ll laddr;
+	struct msghdr msg = {0};
+	struct ethhdr eth;
+	union {
+		struct ipv6hdr ip6h;
+		struct iphdr iph;
+	} nh;
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_tx(domain, type, protocol);
+
+	if (domain == PF_PACKET) {
+		uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
+
+		/* sock_raw passes ll header as data */
+		if (type == SOCK_RAW) {
+			memset(eth.h_dest, 0x06, ETH_ALEN);
+			memset(eth.h_source, 0x02, ETH_ALEN);
+			eth.h_proto = htons(proto);
+			iov[0].iov_base = &eth;
+			iov[0].iov_len = sizeof(eth);
+			msg.msg_iovlen++;
+		}
+
+		/* both sock_raw and sock_dgram expect name */
+		memset(&laddr, 0, sizeof(laddr));
+		laddr.sll_family	= AF_PACKET;
+		laddr.sll_ifindex	= cfg_ifindex;
+		laddr.sll_protocol	= htons(proto);
+		laddr.sll_halen		= ETH_ALEN;
+
+		memset(laddr.sll_addr, 0x06, ETH_ALEN);
+
+		msg.msg_name		= &laddr;
+		msg.msg_namelen		= sizeof(laddr);
+	}
+
+	/* packet and raw sockets with hdrincl must pass network header */
+	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+		if (cfg_family == PF_INET)
+			iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
+		else
+			iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
+
+		iov[1].iov_base = (void *) &nh;
+		msg.msg_iovlen++;
+	}
+
+	iov[2].iov_base = payload;
+	iov[2].iov_len = cfg_payload_len;
+	msg.msg_iovlen++;
+	msg.msg_iov = &iov[3 - msg.msg_iovlen];
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (cfg_cork)
+			do_sendmsg_corked(fd, &msg);
+		else
+			do_sendmsg(fd, &msg, cfg_zerocopy);
+
+		while (!do_poll(fd, POLLOUT)) {
+			if (cfg_zerocopy)
+				do_recv_completions(fd);
+		}
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (cfg_zerocopy)
+		do_recv_remaining_completions(fd);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
+		packets, bytes >> 20, completions,
+		zerocopied == 1 ? 'y' : 'n');
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+	int fd;
+
+	/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
+	 * to recv the only copy of the packet, not a clone
+	 */
+	if (domain == PF_PACKET)
+		error(1, 0, "Use PF_INET/SOCK_RAW to read");
+
+	if (type == SOCK_RAW && protocol == IPPROTO_RAW)
+		error(1, 0, "IPPROTO_RAW: not supported on Rx");
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
+	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+	if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
+		error(1, errno, "bind");
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 1))
+			error(1, errno, "listen");
+		fd = do_accept(fd);
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	/* MSG_TRUNC flushes up to len bytes */
+	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+	if (ret == -1)
+		error(1, errno, "flush");
+	if (!ret)
+		return;
+
+	packets++;
+	bytes += ret;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_datagram(int fd, int type)
+{
+	int ret, off = 0;
+	char buf[64];
+
+	/* MSG_TRUNC will return full datagram length */
+	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+
+	/* raw ipv4 return with header, raw ipv6 without */
+	if (cfg_family == PF_INET && type == SOCK_RAW) {
+		off += sizeof(struct iphdr);
+		ret -= sizeof(struct iphdr);
+	}
+
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != cfg_payload_len)
+		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+	if (ret > sizeof(buf) - off)
+		ret = sizeof(buf) - off;
+	if (memcmp(buf + off, payload, ret))
+		error(1, 0, "recv: data mismatch");
+
+	packets++;
+	bytes += cfg_payload_len;
+}
+
+static void do_rx(int domain, int type, int protocol)
+{
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_rx(domain, type, protocol);
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (type == SOCK_STREAM)
+			do_flush_tcp(fd);
+		else
+			do_flush_datagram(fd, type);
+
+		do_poll(fd, POLLIN);
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+	int i;
+
+	if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
+		error(1, 0, "can only cork udp sockets");
+
+	do_setcpu(cfg_cpu);
+
+	for (i = 0; i < IP_MAXPACKET; i++)
+		payload[i] = 'a' + (i % 26);
+
+	if (cfg_rx)
+		do_rx(domain, type, protocol);
+	else
+		do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [options] <test>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = sizeof(payload) -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	int c;
+
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cork = strtol(optarg, NULL, 0);
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			break;
+		case 'i':
+			cfg_ifindex = if_nametoindex(optarg);
+			if (cfg_ifindex == 0)
+				error(1, errno, "invalid iface: %s", optarg);
+			break;
+		case 'm':
+			cfg_cork_mixed = true;
+			break;
+		case 'p':
+			cfg_port = htons(strtoul(optarg, NULL, 0));
+			break;
+		case 'r':
+			cfg_rx = true;
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			setup_sockaddr(cfg_family, optarg, &cfg_src_addr);
+			break;
+		case 't':
+			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'v':
+			cfg_verbose++;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (cfg_payload_len > max_payload_len)
+		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+	if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
+		error(1, 0, "-m: cork_mixed requires corking and zerocopy");
+
+	if (optind != argc - 1)
+		usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+	const char *cfg_test;
+
+	parse_opts(argc, argv);
+
+	cfg_test = argv[argc - 1];
+
+	if (!strcmp(cfg_test, "packet"))
+		do_test(PF_PACKET, SOCK_RAW, 0);
+	else if (!strcmp(cfg_test, "packet_dgram"))
+		do_test(PF_PACKET, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "raw"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
+	else if (!strcmp(cfg_test, "raw_hdrincl"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
+	else if (!strcmp(cfg_test, "tcp"))
+		do_test(cfg_family, SOCK_STREAM, 0);
+	else if (!strcmp(cfg_test, "udp"))
+		do_test(cfg_family, SOCK_DGRAM, 0);
+	else
+		error(1, 0, "unknown cfg_test %s", cfg_test);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
new file mode 100755
index 000000000000..d571d213418d
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+	exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+	readonly SADDR="${SADDR4}"
+	readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+	readonly SADDR="${SADDR6}"
+	readonly DADDR="${DADDR6}"
+else
+	echo "Invalid IP version ${IP}"
+	exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet:	use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+	RXMODE='raw'
+	;;
+*)
+	RXMODE="${TXMODE}"
+	;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+	local readonly ARGS="$1"
+
+	echo "ipv${IP} ${TXMODE} ${ARGS}"
+	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+	sleep 0.2
+	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+	wait
+}
+
+do_test "${EXTRA_ARGS}"
+do_test "-z ${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index 4e00568d70c2..90cb903c3381 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -178,7 +178,7 @@ if [ "$(id -u)" -ne 0 ];then
 	exit 0
 fi
 
-ip -Version 2>/dev/null >/dev/null
+ip link show 2>/dev/null >/dev/null
 if [ $? -ne 0 ];then
 	echo "SKIP: Could not run test without the ip tool"
 	exit 0
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000000..7c5b12664b03
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+	int fd = -1;
+	int reuseaddr = 1;
+	int v6only = 1;
+	int addrlen;
+	int ret = -1;
+	struct sockaddr *addr;
+	int family = ipv6 ? AF_INET6 : AF_INET;
+
+	struct sockaddr_in6 addr6 = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(PORT),
+		.sin6_addr = in6addr_any
+	};
+	struct sockaddr_in addr4 = {
+		.sin_family = AF_INET,
+		.sin_port = htons(PORT),
+		.sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+	};
+
+
+	if (ipv6) {
+		addr = (struct sockaddr*)&addr6;
+		addrlen = sizeof(addr6);
+	} else {
+		addr = (struct sockaddr*)&addr4;
+		addrlen = sizeof(addr4);
+	}
+
+	if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+		perror("socket");
+		goto out;
+	}
+
+	if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+			       sizeof(v6only)) < 0) {
+		perror("setsockopt IPV6_V6ONLY");
+		goto out;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+		       sizeof(reuseaddr)) < 0) {
+		perror("setsockopt SO_REUSEADDR");
+		goto out;
+	}
+
+	if (bind(fd, addr, addrlen) < 0) {
+		perror("bind");
+		goto out;
+	}
+
+	if (any)
+		return fd;
+
+	if (listen(fd, 1) < 0) {
+		perror("listen");
+		goto out;
+	}
+	return fd;
+out:
+	close(fd);
+	return ret;
+}
+
+int main(void)
+{
+	int listenfd;
+	int fd1, fd2;
+
+	fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+	listenfd = open_port(0, 0);
+	if (listenfd < 0)
+		error(1, errno, "Couldn't open listen socket");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+	fd1 = open_port(1, 1);
+	if (fd1 < 0)
+		error(1, errno, "Couldn't open ipv6 reuseport");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd2 = open_port(0, 1);
+	if (fd2 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	close(fd1);
+	fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+	fprintf(stderr, "Success");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
new file mode 100755
index 000000000000..57b5ff576240
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -0,0 +1,272 @@
+#!/bin/sh
+#
+# This test is for checking rtnetlink callpaths, and get as much coverage as possible.
+#
+# set -e
+
+devdummy="test-dummy0"
+ret=0
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+kci_add_dummy()
+{
+	ip link add name "$devdummy" type dummy
+	check_err $?
+	ip link set "$devdummy" up
+	check_err $?
+}
+
+kci_del_dummy()
+{
+	ip link del dev "$devdummy"
+	check_err $?
+}
+
+# add a bridge with vlans on top
+kci_test_bridge()
+{
+	devbr="test-br0"
+	vlandev="testbr-vlan1"
+
+	ret=0
+	ip link add name "$devbr" type bridge
+	check_err $?
+
+	ip link set dev "$devdummy" master "$devbr"
+	check_err $?
+
+	ip link set "$devbr" up
+	check_err $?
+
+	ip link add link "$devbr" name "$vlandev" type vlan id 1
+	check_err $?
+	ip addr add dev "$vlandev" 10.200.7.23/30
+	check_err $?
+	ip -6 addr add dev "$vlandev" dead:42::1234/64
+	check_err $?
+	ip -d link > /dev/null
+	check_err $?
+	ip r s t all > /dev/null
+	check_err $?
+	ip -6 addr del dev "$vlandev" dead:42::1234/64
+	check_err $?
+
+	ip link del dev "$vlandev"
+	check_err $?
+	ip link del dev "$devbr"
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: bridge setup"
+		return 1
+	fi
+	echo "PASS: bridge setup"
+
+}
+
+kci_test_gre()
+{
+	gredev=neta
+	rem=10.42.42.1
+	loc=10.0.0.1
+
+	ret=0
+	ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+	check_err $?
+	ip link set $gredev up
+	check_err $?
+	ip addr add 10.23.7.10 dev $gredev
+	check_err $?
+	ip route add 10.23.8.0/30 dev $gredev
+	check_err $?
+	ip addr add dev "$devdummy" 10.23.7.11/24
+	check_err $?
+	ip link > /dev/null
+	check_err $?
+	ip addr > /dev/null
+	check_err $?
+	ip addr del dev "$devdummy" 10.23.7.11/24
+	check_err $?
+
+	ip link del $gredev
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: gre tunnel endpoint"
+		return 1
+	fi
+	echo "PASS: gre tunnel endpoint"
+}
+
+# tc uses rtnetlink too, for full tc testing
+# please see tools/testing/selftests/tc-testing.
+kci_test_tc()
+{
+	dev=lo
+	ret=0
+
+	tc qdisc add dev "$dev" root handle 1: htb
+	check_err $?
+	tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+	check_err $?
+	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+	check_err $?
+	tc filter show dev "$dev" parent  1:0 > /dev/null
+	check_err $?
+	tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+	check_err $?
+	tc filter show dev "$dev" parent  1:0 > /dev/null
+	check_err $?
+	tc qdisc del dev "$dev" root handle 1: htb
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: tc htb hierarchy"
+		return 1
+	fi
+	echo "PASS: tc htb hierarchy"
+
+}
+
+kci_test_polrouting()
+{
+	ret=0
+	ip rule add fwmark 1 lookup 100
+	check_err $?
+	ip route add local 0.0.0.0/0 dev lo table 100
+	check_err $?
+	ip r s t all > /dev/null
+	check_err $?
+	ip rule del fwmark 1 lookup 100
+	check_err $?
+	ip route del local 0.0.0.0/0 dev lo table 100
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: policy route test"
+		return 1
+	fi
+	echo "PASS: policy routing"
+}
+
+kci_test_route_get()
+{
+	ret=0
+
+	ip route get 127.0.0.1 > /dev/null
+	check_err $?
+	ip route get 127.0.0.1 dev "$devdummy" > /dev/null
+	check_err $?
+	ip route get ::1 > /dev/null
+	check_err $?
+	ip route get fe80::1 dev "$devdummy" > /dev/null
+	check_err $?
+	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+	check_err $?
+	ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+	check_err $?
+	ip addr add dev "$devdummy" 10.23.7.11/24
+	check_err $?
+	ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
+	check_err $?
+	ip addr del dev "$devdummy" 10.23.7.11/24
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: route get"
+		return 1
+	fi
+
+	echo "PASS: route get"
+}
+
+kci_test_addrlabel()
+{
+	ret=0
+
+	ip addrlabel add prefix dead::/64 dev lo label 1
+	check_err $?
+
+	ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
+	check_err $?
+
+	ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
+	check_err $?
+
+	ip addrlabel add prefix dead::/64 label 1 2> /dev/null
+	check_err $?
+
+	ip addrlabel del prefix dead::/64 label 1 2> /dev/null
+	check_err $?
+
+	# concurrent add/delete
+	for i in $(seq 1 1000); do
+		ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null
+	done &
+
+	for i in $(seq 1 1000); do
+		ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+	done
+
+	wait
+
+	ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: ipv6 addrlabel"
+		return 1
+	fi
+
+	echo "PASS: ipv6 addrlabel"
+}
+
+kci_test_rtnl()
+{
+	kci_add_dummy
+	if [ $ret -ne 0 ];then
+		echo "FAIL: cannot add dummy interface"
+		return 1
+	fi
+
+	kci_test_polrouting
+	kci_test_route_get
+	kci_test_tc
+	kci_test_gre
+	kci_test_bridge
+	kci_test_addrlabel
+
+	kci_del_dummy
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+for x in ip tc;do
+	$x -Version 2>/dev/null >/dev/null
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run test without the $x tool"
+		exit 0
+	fi
+done
+
+kci_test_rtnl
+
+exit $ret
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
index 9e69e982fb38..d9355035e746 100644
--- a/tools/testing/selftests/networking/timestamping/.gitignore
+++ b/tools/testing/selftests/networking/timestamping/.gitignore
@@ -1,3 +1,4 @@
 timestamping
+rxtimestamp
 txtimestamp
 hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
index ccbb9edbbbb9..92fb8ee917c5 100644
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -1,4 +1,6 @@
-TEST_PROGS := hwtstamp_config timestamping txtimestamp
+CFLAGS += -I../../../../../usr/include
+
+TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
 
 all: $(TEST_PROGS)
 
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
new file mode 100644
index 000000000000..dd4162fc0419
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
@@ -0,0 +1,389 @@
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct options {
+	int so_timestamp;
+	int so_timestampns;
+	int so_timestamping;
+};
+
+struct tstamps {
+	bool tstamp;
+	bool tstampns;
+	bool swtstamp;
+	bool hwtstamp;
+};
+
+struct socket_type {
+	char *friendly_name;
+	int type;
+	int protocol;
+	bool enabled;
+};
+
+struct test_case {
+	struct options sockopt;
+	struct tstamps expected;
+	bool enabled;
+};
+
+struct sof_flag {
+	int mask;
+	char *name;
+};
+
+static struct sof_flag sof_flags[] = {
+#define SOF_FLAG(f) { f, #f }
+	SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+};
+
+static struct socket_type socket_types[] = {
+	{ "ip",		SOCK_RAW,	IPPROTO_EGP },
+	{ "udp",	SOCK_DGRAM,	IPPROTO_UDP },
+	{ "tcp",	SOCK_STREAM,	IPPROTO_TCP },
+};
+
+static struct test_case test_cases[] = {
+	{ {}, {} },
+	{
+		{ so_timestamp: 1 },
+		{ tstamp: true }
+	},
+	{
+		{ so_timestampns: 1 },
+		{ tstampns: true }
+	},
+	{
+		{ so_timestamp: 1, so_timestampns: 1 },
+		{ tstampns: true }
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+		{}
+	},
+	{
+		/* Loopback device does not support hw timestamps. */
+		{ so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+			| SOF_TIMESTAMPING_RX_HARDWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ swtstamp: true }
+	},
+	{
+		{ so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ tstamp: true, swtstamp: true }
+	},
+};
+
+static struct option long_options[] = {
+	{ "list_tests", no_argument, 0, 'l' },
+	{ "test_num", required_argument, 0, 'n' },
+	{ "op_size", required_argument, 0, 's' },
+	{ "tcp", no_argument, 0, 't' },
+	{ "udp", no_argument, 0, 'u' },
+	{ "ip", no_argument, 0, 'i' },
+};
+
+static int next_port = 19999;
+static int op_size = 10 * 1024;
+
+void print_test_case(struct test_case *t)
+{
+	int f = 0;
+
+	printf("sockopts {");
+	if (t->sockopt.so_timestamp)
+		printf(" SO_TIMESTAMP ");
+	if (t->sockopt.so_timestampns)
+		printf(" SO_TIMESTAMPNS ");
+	if (t->sockopt.so_timestamping) {
+		printf(" SO_TIMESTAMPING: {");
+		for (f = 0; f < ARRAY_SIZE(sof_flags); f++)
+			if (t->sockopt.so_timestamping & sof_flags[f].mask)
+				printf(" %s |", sof_flags[f].name);
+		printf("}");
+	}
+	printf("} expected cmsgs: {");
+	if (t->expected.tstamp)
+		printf(" SCM_TIMESTAMP ");
+	if (t->expected.tstampns)
+		printf(" SCM_TIMESTAMPNS ");
+	if (t->expected.swtstamp || t->expected.hwtstamp) {
+		printf(" SCM_TIMESTAMPING {");
+		if (t->expected.swtstamp)
+			printf("0");
+		if (t->expected.swtstamp && t->expected.hwtstamp)
+			printf(",");
+		if (t->expected.hwtstamp)
+			printf("2");
+		printf("}");
+	}
+	printf("}\n");
+}
+
+void do_send(int src)
+{
+	int r;
+	char *buf = malloc(op_size);
+
+	memset(buf, 'z', op_size);
+	r = write(src, buf, op_size);
+	if (r < 0)
+		error(1, errno, "Failed to sendmsg");
+
+	free(buf);
+}
+
+bool do_recv(int rcv, int read_size, struct tstamps expected)
+{
+	const int CMSG_SIZE = 1024;
+
+	struct scm_timestamping *ts;
+	struct tstamps actual = {};
+	char cmsg_buf[CMSG_SIZE];
+	struct iovec recv_iov;
+	struct cmsghdr *cmsg;
+	bool failed = false;
+	struct msghdr hdr;
+	int flags = 0;
+	int r;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_iov = &recv_iov;
+	hdr.msg_iovlen = 1;
+	recv_iov.iov_base = malloc(read_size);
+	recv_iov.iov_len = read_size;
+
+	hdr.msg_control = cmsg_buf;
+	hdr.msg_controllen = sizeof(cmsg_buf);
+
+	r = recvmsg(rcv, &hdr, flags);
+	if (r < 0)
+		error(1, errno, "Failed to recvmsg");
+	if (r != read_size)
+		error(1, 0, "Only received %d bytes of payload.", r);
+
+	if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		error(1, 0, "Message was truncated.");
+
+	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			error(1, 0, "Unexpected cmsg_level %d",
+			      cmsg->cmsg_level);
+		switch (cmsg->cmsg_type) {
+		case SCM_TIMESTAMP:
+			actual.tstamp = true;
+			break;
+		case SCM_TIMESTAMPNS:
+			actual.tstampns = true;
+			break;
+		case SCM_TIMESTAMPING:
+			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+			actual.swtstamp = !!ts->ts[0].tv_sec;
+			if (ts->ts[1].tv_sec != 0)
+				error(0, 0, "ts[1] should not be set.");
+			actual.hwtstamp = !!ts->ts[2].tv_sec;
+			break;
+		default:
+			error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type);
+		}
+	}
+
+#define VALIDATE(field) \
+	do { \
+		if (expected.field != actual.field) { \
+			if (expected.field) \
+				error(0, 0, "Expected " #field " to be set."); \
+			else \
+				error(0, 0, \
+				      "Expected " #field " to not be set."); \
+			failed = true; \
+		} \
+	} while (0)
+
+	VALIDATE(tstamp);
+	VALIDATE(tstampns);
+	VALIDATE(swtstamp);
+	VALIDATE(hwtstamp);
+#undef VALIDATE
+
+	free(recv_iov.iov_base);
+
+	return failed;
+}
+
+void config_so_flags(int rcv, struct options o)
+{
+	int on = 1;
+
+	if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
+		error(1, errno, "Failed to enable SO_REUSEADDR");
+
+	if (o.so_timestamp &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP,
+		       &o.so_timestamp, sizeof(o.so_timestamp)) < 0)
+		error(1, errno, "Failed to enable SO_TIMESTAMP");
+
+	if (o.so_timestampns &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS,
+		       &o.so_timestampns, sizeof(o.so_timestampns)) < 0)
+		error(1, errno, "Failed to enable SO_TIMESTAMPNS");
+
+	if (o.so_timestamping &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING,
+		       &o.so_timestamping, sizeof(o.so_timestamping)) < 0)
+		error(1, errno, "Failed to set SO_TIMESTAMPING");
+}
+
+bool run_test_case(struct socket_type s, struct test_case t)
+{
+	int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+	int read_size = op_size;
+	struct sockaddr_in addr;
+	bool failed = false;
+	int src, dst, rcv;
+
+	src = socket(AF_INET, s.type, s.protocol);
+	if (src < 0)
+		error(1, errno, "Failed to open src socket");
+
+	dst = socket(AF_INET, s.type, s.protocol);
+	if (dst < 0)
+		error(1, errno, "Failed to open dst socket");
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	addr.sin_port = htons(port);
+
+	if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		error(1, errno, "Failed to bind to port %d", port);
+
+	if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+		error(1, errno, "Failed to listen");
+
+	if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		error(1, errno, "Failed to connect");
+
+	if (s.type == SOCK_STREAM) {
+		rcv = accept(dst, NULL, NULL);
+		if (rcv < 0)
+			error(1, errno, "Failed to accept");
+		close(dst);
+	} else {
+		rcv = dst;
+	}
+
+	config_so_flags(rcv, t.sockopt);
+	usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
+	do_send(src);
+
+	if (s.type == SOCK_RAW)
+		read_size += 20;  /* for IP header */
+	failed = do_recv(rcv, read_size, t.expected);
+
+	close(rcv);
+	close(src);
+
+	return failed;
+}
+
+int main(int argc, char **argv)
+{
+	bool all_protocols = true;
+	bool all_tests = true;
+	int arg_index = 0;
+	int failures = 0;
+	int s, t;
+	char opt;
+
+	while ((opt = getopt_long(argc, argv, "", long_options,
+				  &arg_index)) != -1) {
+		switch (opt) {
+		case 'l':
+			for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+				printf("%d\t", t);
+				print_test_case(&test_cases[t]);
+			}
+			return 0;
+		case 'n':
+			t = atoi(optarg);
+			if (t >= ARRAY_SIZE(test_cases))
+				error(1, 0, "Invalid test case: %d", t);
+			all_tests = false;
+			test_cases[t].enabled = true;
+			break;
+		case 's':
+			op_size = atoi(optarg);
+			break;
+		case 't':
+			all_protocols = false;
+			socket_types[2].enabled = true;
+			break;
+		case 'u':
+			all_protocols = false;
+			socket_types[1].enabled = true;
+			break;
+		case 'i':
+			all_protocols = false;
+			socket_types[0].enabled = true;
+			break;
+		default:
+			error(1, 0, "Failed to parse parameters.");
+		}
+	}
+
+	for (s = 0; s < ARRAY_SIZE(socket_types); s++) {
+		if (!all_protocols && !socket_types[s].enabled)
+			continue;
+
+		printf("Testing %s...\n", socket_types[s].friendly_name);
+		for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+			if (!all_tests && !test_cases[t].enabled)
+				continue;
+
+			printf("Starting testcase %d...\n", t);
+			if (run_test_case(socket_types[s], test_cases[t])) {
+				failures++;
+				printf("FAILURE in test case ");
+				print_test_case(&test_cases[t]);
+			}
+		}
+	}
+	if (!failures)
+		printf("PASSED.\n");
+	return failures;
+}
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config
new file mode 100644
index 000000000000..598d0a225fc9
--- /dev/null
+++ b/tools/testing/selftests/nsfs/config
@@ -0,0 +1,3 @@
+CONFIG_USER_NS=y
+CONFIG_UTS_NS=y
+CONFIG_PID_NS=y
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index fe6bc60dfc60..8932263e5a74 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -6,7 +6,7 @@ include ../../lib.mk
 
 all: $(TEST_PROGS)
 
-CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm
+CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
 
 $(TEST_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
 
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index ef7c971abb67..bceb53f57573 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -8,7 +8,7 @@ message:
 	.section ".toc"
 	.balign 8
 pattern:
-	.llong 0x5555AAAA5555AAAA
+	.8byte 0x5555AAAA5555AAAA
 
 	.text
 FUNC_START(_start)
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 958c11c14acd..7bfcd454fb2a 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -15,6 +15,7 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 $(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.o
 
 SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index e79ccd6aada1..a7ac2e4c60d9 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -30,6 +30,7 @@
 
 #include "utils.h"
 #include "tm.h"
+#include "../pmu/lib.h"
 
 #define SPRN_DSCR       0x03
 
@@ -75,8 +76,6 @@ int test_body(void)
 		);
 		assert(rv); /* make sure the transaction aborted */
 		if ((texasr >> 56) != TM_CAUSE_RESCHED) {
-			putchar('.');
-			fflush(stdout);
 			continue;
 		}
 		if (dscr2 != dscr1) {
@@ -89,7 +88,12 @@ int test_body(void)
 	}
 }
 
-int main(void)
+static int tm_resched_dscr(void)
 {
-	return test_harness(test_body, "tm_resched_dscr");
+	return eat_cpu(test_body);
+}
+
+int main(int argc, const char *argv[])
+{
+	return test_harness(tm_resched_dscr, "tm_resched_dscr");
 }
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
new file mode 100644
index 000000000000..5a4a26e5464b
--- /dev/null
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -0,0 +1,2 @@
+logs
+*uuid
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
index 83dd42b2129e..d4064c742c26 100644
--- a/tools/testing/selftests/ptp/Makefile
+++ b/tools/testing/selftests/ptp/Makefile
@@ -1,3 +1,4 @@
+CFLAGS += -I../../../../usr/include/
 TEST_PROGS := testptp
 LDLIBS += -lrt
 all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index aeb0c805f3ca..553d870b4ca9 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,8 +1,16 @@
-TEST_GEN_PROGS := seccomp_bpf
-CFLAGS += -Wl,-no-as-needed -Wall
-LDFLAGS += -lpthread
+all:
 
 include ../lib.mk
 
-$(TEST_GEN_PROGS): seccomp_bpf.c ../kselftest_harness.h
-	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+	$(CC) $(CFLAGS) $(LDFLAGS) -lpthread $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 000000000000..5838c8697ec3
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a)    (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+	pid_t pid, ret;
+	unsigned long long i;
+	struct timespec start, finish;
+
+	pid = getpid();
+	assert(clock_gettime(clk_id, &start) == 0);
+	for (i = 0; i < samples; i++) {
+		ret = syscall(__NR_getpid);
+		assert(pid == ret);
+	}
+	assert(clock_gettime(clk_id, &finish) == 0);
+
+	i = finish.tv_sec - start.tv_sec;
+	i *= 1000000000;
+	i += finish.tv_nsec - start.tv_nsec;
+
+	printf("%lu.%09lu - %lu.%09lu = %llu\n",
+		finish.tv_sec, finish.tv_nsec,
+		start.tv_sec, start.tv_nsec,
+		i);
+
+	return i;
+}
+
+unsigned long long calibrate(void)
+{
+	unsigned long long i;
+
+	printf("Calibrating reasonable sample size...\n");
+
+	for (i = 5; ; i++) {
+		unsigned long long samples = 1 << i;
+
+		/* Find something that takes more than 5 seconds to run. */
+		if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+			return samples;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	unsigned long long samples;
+	unsigned long long native, filtered;
+
+	if (argc > 1)
+		samples = strtoull(argv[1], NULL, 0);
+	else
+		samples = calibrate();
+
+	printf("Benchmarking %llu samples...\n", samples);
+
+	native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid native: %llu ns\n", native);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	assert(ret == 0);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	assert(ret == 0);
+
+	filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+	printf("Estimated seccomp overhead per syscall: %llu ns\n",
+		filtered - native);
+
+	if (filtered == native)
+		printf("Trying running again with more samples.\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 73f5ea6778ce..24dbf634e2dd 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -6,10 +6,18 @@
  */
 
 #include <sys/types.h>
-#include <asm/siginfo.h>
-#define __have_siginfo_t 1
-#define __have_sigval_t 1
-#define __have_sigevent_t 1
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
 
 #include <errno.h>
 #include <linux/filter.h>
@@ -68,17 +76,7 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
-#ifndef SECCOMP_RET_KILL
-#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
-
-/* Masks for the return value sections. */
-#define SECCOMP_RET_ACTION      0x7fff0000U
-#define SECCOMP_RET_DATA        0x0000ffffU
-
+#ifndef SECCOMP_RET_ALLOW
 struct seccomp_data {
 	int nr;
 	__u32 arch;
@@ -87,6 +85,70 @@ struct seccomp_data {
 };
 #endif
 
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#endif
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#endif
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# elif defined(__hppa__)
+#  define __NR_seccomp 338
+# elif defined(__powerpc__)
+#  define __NR_seccomp 358
+# elif defined(__s390__)
+#  define __NR_seccomp 348
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
 #elif __BYTE_ORDER == __BIG_ENDIAN
@@ -107,7 +169,7 @@ TEST(mode_strict_support)
 	ASSERT_EQ(0, ret) {
 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
 	}
-	syscall(__NR_exit, 1);
+	syscall(__NR_exit, 0);
 }
 
 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
@@ -136,7 +198,7 @@ TEST(no_new_privs_support)
 	}
 }
 
-/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
 TEST(mode_filter_support)
 {
 	long ret;
@@ -342,6 +404,28 @@ TEST(empty_prog)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+TEST(log_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	/* getppid() should succeed and be logged (no check for logging) */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 {
 	struct sock_filter filter[] = {
@@ -520,6 +604,117 @@ TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
 	close(fd);
 }
 
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+	bool die = (bool)data;
+
+	if (die) {
+		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+	pthread_t thread;
+	void *status;
+	/* Kill only when calling __NR_prctl. */
+	struct sock_filter filter_thread[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_thread = {
+		.len = (unsigned short)ARRAY_SIZE(filter_thread),
+		.filter = filter_thread,
+	};
+	struct sock_filter filter_process[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_process = {
+		.len = (unsigned short)ARRAY_SIZE(filter_process),
+		.filter = filter_process,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+			     kill_process ? &prog_process : &prog_thread));
+
+	/*
+	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+	 * flag cannot be downgraded by a new filter.
+	 */
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+	/* Start a thread that will exit immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+	/* Start a thread that will die immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+	/*
+	 * If we get here, only the spawned thread died. Let the parent know
+	 * the whole process didn't die (i.e. this thread, the spawner,
+	 * stayed running).
+	 */
+	exit(42);
+}
+
+TEST(KILL_thread)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, false);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If only the thread was killed, we'll see exit 42. */
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, true);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 TEST(arg_out_of_range)
 {
@@ -541,26 +736,30 @@ TEST(arg_out_of_range)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+#define ERRNO_FILTER(name, errno)					\
+	struct sock_filter _read_filter_##name[] = {			\
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
+			offsetof(struct seccomp_data, nr)),		\
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
+	};								\
+	struct sock_fprog prog_##name = {				\
+		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
+		.filter = _read_filter_##name,				\
+	}
+
+/* Make sure basic errno values are correctly passed through a filter. */
 TEST(ERRNO_valid)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(valid, E2BIG);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -568,26 +767,17 @@ TEST(ERRNO_valid)
 	EXPECT_EQ(E2BIG, errno);
 }
 
+/* Make sure an errno of zero is correctly handled by the arch code. */
 TEST(ERRNO_zero)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(zero, 0);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -595,26 +785,21 @@ TEST(ERRNO_zero)
 	EXPECT_EQ(0, read(0, NULL, 0));
 }
 
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
 TEST(ERRNO_capped)
 {
-	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
-			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-	};
-	struct sock_fprog prog = {
-		.len = (unsigned short)ARRAY_SIZE(filter),
-		.filter = filter,
-	};
+	ERRNO_FILTER(capped, 4096);
 	long ret;
 	pid_t parent = getppid();
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
-	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
 	ASSERT_EQ(0, ret);
 
 	EXPECT_EQ(parent, syscall(__NR_getppid));
@@ -622,6 +807,37 @@ TEST(ERRNO_capped)
 	EXPECT_EQ(4095, errno);
 }
 
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+	ERRNO_FILTER(first,  11);
+	ERRNO_FILTER(second, 13);
+	ERRNO_FILTER(third,  12);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(12, errno);
+}
+
 FIXTURE_DATA(TRAP) {
 	struct sock_fprog prog;
 };
@@ -676,7 +892,7 @@ TEST_F_SIGNAL(TRAP, ign, SIGSYS)
 	syscall(__NR_getpid);
 }
 
-static struct siginfo TRAP_info;
+static siginfo_t TRAP_info;
 static volatile int TRAP_nr;
 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
 {
@@ -735,6 +951,7 @@ TEST_F(TRAP, handler)
 
 FIXTURE_DATA(precedence) {
 	struct sock_fprog allow;
+	struct sock_fprog log;
 	struct sock_fprog trace;
 	struct sock_fprog error;
 	struct sock_fprog trap;
@@ -746,6 +963,13 @@ FIXTURE_SETUP(precedence)
 	struct sock_filter allow_insns[] = {
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
+	struct sock_filter log_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
 	struct sock_filter trace_insns[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
@@ -782,6 +1006,7 @@ FIXTURE_SETUP(precedence)
 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 	FILTER_ALLOC(allow);
+	FILTER_ALLOC(log);
 	FILTER_ALLOC(trace);
 	FILTER_ALLOC(error);
 	FILTER_ALLOC(trap);
@@ -792,6 +1017,7 @@ FIXTURE_TEARDOWN(precedence)
 {
 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 	FILTER_FREE(allow);
+	FILTER_FREE(log);
 	FILTER_FREE(trace);
 	FILTER_FREE(error);
 	FILTER_FREE(trap);
@@ -809,6 +1035,8 @@ TEST_F(precedence, allow_ok)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -833,6 +1061,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -864,6 +1094,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
@@ -885,6 +1117,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -910,6 +1144,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -931,6 +1167,8 @@ TEST_F(precedence, errno_is_third)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -949,6 +1187,8 @@ TEST_F(precedence, errno_is_third_in_any_order)
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
@@ -971,6 +1211,8 @@ TEST_F(precedence, trace_is_fourth)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
@@ -992,12 +1234,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	/* No ptracer */
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 }
 
+TEST_F(precedence, log_is_fifth)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
 #ifndef PTRACE_O_TRACESECCOMP
 #define PTRACE_O_TRACESECCOMP	0x00000080
 #endif
@@ -1262,6 +1546,13 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
 
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(val, action)
+#endif
+
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
@@ -1357,7 +1648,7 @@ void change_syscall(struct __test_metadata *_metadata,
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 		TH_LOG("Can't modify syscall return on this architecture");
 #else
-		regs.SYSCALL_RET = 1;
+		regs.SYSCALL_RET = EPERM;
 #endif
 
 #ifdef HAVE_GETREGS
@@ -1426,6 +1717,8 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 
 	if (nr == __NR_getpid)
 		change_syscall(_metadata, tracee, __NR_getppid);
+	if (nr == __NR_open)
+		change_syscall(_metadata, tracee, -1);
 }
 
 FIXTURE_DATA(TRACE_syscall) {
@@ -1480,6 +1773,28 @@ FIXTURE_TEARDOWN(TRACE_syscall)
 		free(self->prog.filter);
 }
 
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer will redirect getpid to getppid. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the open syscall, resulting in EPERM. */
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_open));
+}
+
 TEST_F(TRACE_syscall, syscall_allowed)
 {
 	long ret;
@@ -1520,13 +1835,8 @@ TEST_F(TRACE_syscall, syscall_dropped)
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
 	ASSERT_EQ(0, ret);
 
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-	/* gettid has been skipped */
-	EXPECT_EQ(-1, syscall(__NR_gettid));
-#else
 	/* gettid has been skipped and an altered return value stored. */
-	EXPECT_EQ(1, syscall(__NR_gettid));
-#endif
+	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
 }
 
@@ -1557,6 +1867,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE)
 	ASSERT_EQ(0, ret);
 
 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+	errno = 0;
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 	EXPECT_EQ(EPERM, errno);
 }
@@ -1654,47 +1965,6 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-#ifndef __NR_seccomp
-# if defined(__i386__)
-#  define __NR_seccomp 354
-# elif defined(__x86_64__)
-#  define __NR_seccomp 317
-# elif defined(__arm__)
-#  define __NR_seccomp 383
-# elif defined(__aarch64__)
-#  define __NR_seccomp 277
-# elif defined(__hppa__)
-#  define __NR_seccomp 338
-# elif defined(__powerpc__)
-#  define __NR_seccomp 358
-# elif defined(__s390__)
-#  define __NR_seccomp 348
-# else
-#  warning "seccomp syscall number unknown for this architecture"
-#  define __NR_seccomp 0xffff
-# endif
-#endif
-
-#ifndef SECCOMP_SET_MODE_STRICT
-#define SECCOMP_SET_MODE_STRICT 0
-#endif
-
-#ifndef SECCOMP_SET_MODE_FILTER
-#define SECCOMP_SET_MODE_FILTER 1
-#endif
-
-#ifndef SECCOMP_FILTER_FLAG_TSYNC
-#define SECCOMP_FILTER_FLAG_TSYNC 1
-#endif
-
-#ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, void *args)
-{
-	errno = 0;
-	return syscall(__NR_seccomp, op, flags, args);
-}
-#endif
-
 TEST(seccomp_syscall)
 {
 	struct sock_filter filter[] = {
@@ -1783,6 +2053,67 @@ TEST(seccomp_syscall_mode_lock)
 	}
 }
 
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_LOG };
+	unsigned int flag, all_flags;
+	int i;
+	long ret;
+
+	/* Test detection of known-good filter flags */
+	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+		flag = flags[i];
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		ASSERT_NE(ENOSYS, errno) {
+			TH_LOG("Kernel does not support seccomp syscall!");
+		}
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+			       flag);
+		}
+
+		all_flags |= flag;
+	}
+
+	/* Test detection of all known-good filter flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+		       all_flags);
+	}
+
+	/* Test detection of an unknown filter flag */
+	flag = -1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+		       flag);
+	}
+
+	/*
+	 * Test detection of an unknown filter flag that may simply need to be
+	 * added to this test
+	 */
+	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+		       flag);
+	}
+}
+
 TEST(TSYNC_first)
 {
 	struct sock_filter filter[] = {
@@ -2421,6 +2752,99 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+	struct sock_filter allow_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter kill_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog allow_prog = {
+		.len = (unsigned short)ARRAY_SIZE(allow_filter),
+		.filter = allow_filter,
+	};
+	struct sock_fprog kill_prog = {
+		.len = (unsigned short)ARRAY_SIZE(kill_filter),
+		.filter = kill_filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_NE(0, ret) {
+		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+	}
+
+	/* Verify that a simple, permissive filter can be added with no flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+	EXPECT_EQ(0, ret);
+
+	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+	}
+	EXPECT_EQ(0, ret);
+
+	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &kill_prog);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST(get_action_avail)
+{
+	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
+			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
+	__u32 unknown_action = 0x10000000U;
+	int i;
+	long ret;
+
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+	}
+	EXPECT_EQ(ret, 0);
+
+	for (i = 0; i < ARRAY_SIZE(actions); i++) {
+		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+		EXPECT_EQ(ret, 0) {
+			TH_LOG("Expected action (0x%X) not available!",
+			       actions[i]);
+		}
+	}
+
+	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
 /*
  * TODO:
  * - add microbenchmarks
@@ -2429,6 +2853,8 @@ TEST(syscall_restart)
  * - endianness checking when appropriate
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
  * - ...
  */
 
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index ccd07343d418..97bb150837df 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -17,6 +17,8 @@
 #include <assert.h>
 #include <errno.h>
 
+#include "../kselftest.h"
+
 #ifndef SS_AUTODISARM
 #define SS_AUTODISARM  (1U << 31)
 #endif
@@ -37,12 +39,15 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 	stack_t stk;
 	struct stk_data *p;
 
+#if __s390x__
+	register unsigned long sp asm("%15");
+#else
 	register unsigned long sp asm("sp");
+#endif
 
 	if (sp < (unsigned long)sstack ||
 			sp >= (unsigned long)sstack + SIGSTKSZ) {
-		printf("[FAIL]\tSP is not on sigaltstack\n");
-		exit(EXIT_FAILURE);
+		ksft_exit_fail_msg("SP is not on sigaltstack\n");
 	}
 	/* put some data on stack. other sighandler will try to overwrite it */
 	aa = alloca(1024);
@@ -50,21 +55,22 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 	p = (struct stk_data *)(aa + 512);
 	strcpy(p->msg, msg);
 	p->flag = 1;
-	printf("[RUN]\tsignal USR1\n");
+	ksft_print_msg("[RUN]\tsignal USR1\n");
 	err = sigaltstack(NULL, &stk);
 	if (err) {
-		perror("[FAIL]\tsigaltstack()");
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags != SS_DISABLE)
-		printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n",
+		ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n",
 				stk.ss_flags);
 	else
-		printf("[OK]\tsigaltstack is disabled in sighandler\n");
+		ksft_test_result_pass(
+				"sigaltstack is disabled in sighandler\n");
 	swapcontext(&sc, &uc);
-	printf("%s\n", p->msg);
+	ksft_print_msg("%s\n", p->msg);
 	if (!p->flag) {
-		printf("[RUN]\tAborting\n");
+		ksft_exit_skip("[RUN]\tAborting\n");
 		exit(EXIT_FAILURE);
 	}
 }
@@ -74,13 +80,13 @@ void my_usr2(int sig, siginfo_t *si, void *u)
 	char *aa;
 	struct stk_data *p;
 
-	printf("[RUN]\tsignal USR2\n");
+	ksft_print_msg("[RUN]\tsignal USR2\n");
 	aa = alloca(1024);
 	/* dont run valgrind on this */
 	/* try to find the data stored by previous sighandler */
 	p = memmem(aa, 1024, msg, strlen(msg));
 	if (p) {
-		printf("[FAIL]\tsigaltstack re-used\n");
+		ksft_test_result_fail("sigaltstack re-used\n");
 		/* corrupt the data */
 		strcpy(p->msg, msg2);
 		/* tell other sighandler that his data is corrupted */
@@ -90,7 +96,7 @@ void my_usr2(int sig, siginfo_t *si, void *u)
 
 static void switch_fn(void)
 {
-	printf("[RUN]\tswitched to user ctx\n");
+	ksft_print_msg("[RUN]\tswitched to user ctx\n");
 	raise(SIGUSR2);
 	setcontext(&sc);
 }
@@ -101,6 +107,8 @@ int main(void)
 	stack_t stk;
 	int err;
 
+	ksft_print_header();
+
 	sigemptyset(&act.sa_mask);
 	act.sa_flags = SA_ONSTACK | SA_SIGINFO;
 	act.sa_sigaction = my_usr1;
@@ -110,19 +118,20 @@ int main(void)
 	sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
 		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 	if (sstack == MAP_FAILED) {
-		perror("mmap()");
+		ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
 		return EXIT_FAILURE;
 	}
 
 	err = sigaltstack(NULL, &stk);
 	if (err) {
-		perror("[FAIL]\tsigaltstack()");
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags == SS_DISABLE) {
-		printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
+		ksft_test_result_pass(
+				"Initial sigaltstack state was SS_DISABLE\n");
 	} else {
-		printf("[FAIL]\tInitial sigaltstack state was %x; "
+		ksft_exit_fail_msg("Initial sigaltstack state was %x; "
 		       "should have been SS_DISABLE\n", stk.ss_flags);
 		return EXIT_FAILURE;
 	}
@@ -133,7 +142,8 @@ int main(void)
 	err = sigaltstack(&stk, NULL);
 	if (err) {
 		if (errno == EINVAL) {
-			printf("[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+			ksft_exit_skip(
+				"[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
 			/*
 			 * If test cases for the !SS_AUTODISARM variant were
 			 * added, we could still run them.  We don't have any
@@ -142,7 +152,9 @@ int main(void)
 			 */
 			return 0;
 		} else {
-			perror("[FAIL]\tsigaltstack(SS_ONSTACK | SS_AUTODISARM)");
+			ksft_exit_fail_msg(
+				"sigaltstack(SS_ONSTACK | SS_AUTODISARM)  %s\n",
+					strerror(errno));
 			return EXIT_FAILURE;
 		}
 	}
@@ -150,7 +162,7 @@ int main(void)
 	ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
 		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
 	if (ustack == MAP_FAILED) {
-		perror("mmap()");
+		ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
 		return EXIT_FAILURE;
 	}
 	getcontext(&uc);
@@ -162,16 +174,17 @@ int main(void)
 
 	err = sigaltstack(NULL, &stk);
 	if (err) {
-		perror("[FAIL]\tsigaltstack()");
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags != SS_AUTODISARM) {
-		printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n",
+		ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n",
 				stk.ss_flags);
 		exit(EXIT_FAILURE);
 	}
-	printf("[OK]\tsigaltstack is still SS_AUTODISARM after signal\n");
+	ksft_test_result_pass(
+			"sigaltstack is still SS_AUTODISARM after signal\n");
 
-	printf("[OK]\tTest passed\n");
+	ksft_exit_pass();
 	return 0;
 }
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
new file mode 100644
index 000000000000..1e23fefd68e8
--- /dev/null
+++ b/tools/testing/selftests/splice/.gitignore
@@ -0,0 +1 @@
+default_file_splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index 9fc78e5e5451..7e1187e007fa 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,7 +1,4 @@
 TEST_PROGS := default_file_splice_read.sh
-EXTRA := default_file_splice_read
-all: $(TEST_PROGS) $(EXTRA)
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read
 
 include ../lib.mk
-
-EXTRA_CLEAN := $(EXTRA)
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index 4981c6b6d050..8e04d0afcbd7 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -2,12 +2,16 @@ CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
 CFLAGS += -I../../../../usr/include/
 LDFLAGS += -pthread
 
-TEST_PROGS = sync_test
-
-all: $(TEST_PROGS)
+.PHONY: all clean
 
 include ../lib.mk
 
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
 OBJS = sync_test.o sync.o
 
 TESTS += sync_alloc.o
@@ -18,6 +22,16 @@ TESTS += sync_stress_parallelism.o
 TESTS += sync_stress_consumer.o
 TESTS += sync_stress_merge.o
 
-sync_test: $(OBJS) $(TESTS)
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+	$(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
 
-EXTRA_CLEAN := sync_test $(OBJS) $(TESTS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 62fa666e501a..7f7938263c5c 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -31,62 +31,83 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
 
+#include "../kselftest.h"
 #include "synctest.h"
 
 static int run_test(int (*test)(void), char *name)
 {
 	int result;
 	pid_t childpid;
+	int ret;
 
 	fflush(stdout);
 	childpid = fork();
 
 	if (childpid) {
 		waitpid(childpid, &result, 0);
-		if (WIFEXITED(result))
-			return WEXITSTATUS(result);
+		if (WIFEXITED(result)) {
+			ret = WEXITSTATUS(result);
+			if (!ret)
+				ksft_test_result_pass("[RUN]\t%s\n", name);
+			else
+				ksft_test_result_fail("[RUN]\t%s\n", name);
+			return ret;
+		}
 		return 1;
 	}
 
-	printf("[RUN]\tExecuting %s\n", name);
 	exit(test());
 }
 
-static int sync_api_supported(void)
+static void sync_api_supported(void)
 {
 	struct stat sbuf;
+	int ret;
 
-	return 0 == stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+	ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+	if (!ret)
+		return;
+
+	if (errno == ENOENT)
+		ksft_exit_skip("Sync framework not supported by kernel\n");
+
+	if (errno == EACCES)
+		ksft_exit_skip("Run Sync test as root.\n");
+
+	ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s",
+				strerror(errno));
 }
 
 int main(void)
 {
-	int err = 0;
+	int err;
 
-	if (!sync_api_supported()) {
-		printf("SKIP: Sync framework not supported by kernel\n");
-		return 0;
-	}
+	ksft_print_header();
+
+	sync_api_supported();
 
-	printf("[RUN]\tTesting sync framework\n");
+	ksft_print_msg("[RUN]\tTesting sync framework\n");
 
-	err += RUN_TEST(test_alloc_timeline);
-	err += RUN_TEST(test_alloc_fence);
-	err += RUN_TEST(test_alloc_fence_negative);
+	RUN_TEST(test_alloc_timeline);
+	RUN_TEST(test_alloc_fence);
+	RUN_TEST(test_alloc_fence_negative);
 
-	err += RUN_TEST(test_fence_one_timeline_wait);
-	err += RUN_TEST(test_fence_one_timeline_merge);
-	err += RUN_TEST(test_fence_merge_same_fence);
-	err += RUN_TEST(test_fence_multi_timeline_wait);
-	err += RUN_TEST(test_stress_two_threads_shared_timeline);
-	err += RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
-	err += RUN_TEST(test_merge_stress_random_merge);
+	RUN_TEST(test_fence_one_timeline_wait);
+	RUN_TEST(test_fence_one_timeline_merge);
+	RUN_TEST(test_fence_merge_same_fence);
+	RUN_TEST(test_fence_multi_timeline_wait);
+	RUN_TEST(test_stress_two_threads_shared_timeline);
+	RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
+	RUN_TEST(test_merge_stress_random_merge);
 
+	err = ksft_get_fail_cnt();
 	if (err)
-		printf("[FAIL]\tsync errors: %d\n", err);
-	else
-		printf("[OK]\tsync\n");
+		ksft_exit_fail_msg("%d out of %d sync tests failed\n",
+					err, ksft_test_num());
 
-	return !!err;
+	/* need this return to keep gcc happy */
+	return ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h
index e7d1d57dba7a..90a8e5369914 100644
--- a/tools/testing/selftests/sync/synctest.h
+++ b/tools/testing/selftests/sync/synctest.h
@@ -29,10 +29,11 @@
 #define SELFTESTS_SYNCTEST_H
 
 #include <stdio.h>
+#include "../kselftest.h"
 
 #define ASSERT(cond, msg) do { \
 	if (!(cond)) { \
-		printf("[ERROR]\t%s", (msg)); \
+		ksft_print_msg("[ERROR]\t%s", (msg)); \
 		return 1; \
 	} \
 } while (0)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
index af519bc97a8e..6973bdc5b5bf 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
@@ -1111,5 +1111,55 @@
         "teardown": [
             "$TC actions flush action gact"
         ]
+    },
+    {
+        "id": "a568",
+        "name": "Add action with ife type",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action ife encode type 0xDEAD index 1"
+        ],
+        "cmdUnderTest": "$TC actions get action ife index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "type 0xDEAD",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "b983",
+        "name": "Add action without ife type",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action ife encode index 1"
+        ],
+        "cmdUnderTest": "$TC actions get action ife index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "type 0xED3E",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
     }
 ]
 \ No newline at end of file
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index a9b86133b9b3..ae4593115408 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -1,5 +1,4 @@
-BUILD_FLAGS = -DKTEST
-CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
 LDFLAGS += -lrt -lpthread -lm
 
 # these are all "safe" tests that don't modify
@@ -7,9 +6,11 @@ LDFLAGS += -lrt -lpthread -lm
 TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
 	     inconsistency-check raw_skew threadtest rtctest
 
-TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch freq-step leap-a-day \
-		      leapcrash set-tai set-2038 set-tz rtctest_setdate
+		      leapcrash set-tai set-2038 set-tz
+
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate
 
 
 include ../lib.mk
@@ -18,16 +19,4 @@ include ../lib.mk
 # and may modify the system time or trigger
 # other behavior like suspend
 run_destructive_tests: run_tests
-	./alarmtimer-suspend
-	./valid-adjtimex
-	./adjtick
-	./change_skew
-	./skew_consistency
-	./clocksource-switch
-	./freq-step
-	./leap-a-day -s -i 10
-	./leapcrash
-	./set-tz
-	./set-tai
-	./set-2038
-
+	$(call RUN_TESTS, $(DESTRUCTIVE_TESTS))
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 9887fd538fec..0caca3a06bd2 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -23,18 +23,7 @@
 #include <sys/timex.h>
 #include <time.h>
 
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CLOCK_MONOTONIC_RAW	4
 
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 2b361b830395..4da09dbf83ba 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -28,18 +28,7 @@
 #include <signal.h>
 #include <stdlib.h>
 #include <pthread.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CLOCK_REALTIME			0
 #define CLOCK_MONOTONIC			1
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index cb1968977c04..c4eab7124990 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -28,18 +28,7 @@
 #include <sys/time.h>
 #include <sys/timex.h>
 #include <time.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index 5ff165373f8b..bfc974b4572d 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -34,18 +34,7 @@
 #include <fcntl.h>
 #include <string.h>
 #include <sys/wait.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 
 int get_clocksources(char list[][30])
@@ -61,7 +50,7 @@ int get_clocksources(char list[][30])
 
 	close(fd);
 
-	for (i = 0; i < 30; i++)
+	for (i = 0; i < 10; i++)
 		list[i][0] = '\0';
 
 	head = buf;
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
index 74c60e8759a0..022d3ffe3fbf 100644
--- a/tools/testing/selftests/timers/inconsistency-check.c
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -28,18 +28,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CALLS_PER_LOOP 64
 #define NSEC_PER_SEC 1000000000ULL
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index fb46ad6ac92c..19e46ed5dfb5 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -48,18 +48,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 #define CLOCK_TAI 11
@@ -190,18 +179,18 @@ int main(int argc, char **argv)
 	struct sigevent se;
 	struct sigaction act;
 	int signum = SIGRTMAX;
-	int settime = 0;
+	int settime = 1;
 	int tai_time = 0;
 	int insert = 1;
-	int iterations = -1;
+	int iterations = 10;
 	int opt;
 
 	/* Process arguments */
 	while ((opt = getopt(argc, argv, "sti:")) != -1) {
 		switch (opt) {
-		case 's':
-			printf("Setting time to speed up testing\n");
-			settime = 1;
+		case 'w':
+			printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n");
+			settime = 0;
 			break;
 		case 'i':
 			iterations = atoi(optarg);
@@ -210,9 +199,10 @@ int main(int argc, char **argv)
 			tai_time = 1;
 			break;
 		default:
-			printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]);
-			printf("	-s: Set time to right before leap second each iteration\n");
-			printf("	-i: Number of iterations\n");
+			printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]);
+			printf("	-w: Set flag and wait for leap second each iteration");
+			printf("	    (default sets time to right before leapsecond)\n");
+			printf("	-i: Number of iterations (-1 = infinite, default is 10)\n");
 			printf("	-t: Print TAI time\n");
 			exit(-1);
 		}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index a1071bdbdeb7..830c462f605d 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -22,20 +22,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
-
-
 
 /* clear NTP time_status & time_state */
 int clear_time_state(void)
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
index a2a3924d0b41..1867db5d6f5e 100644
--- a/tools/testing/selftests/timers/mqueue-lat.c
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -29,18 +29,7 @@
 #include <signal.h>
 #include <errno.h>
 #include <mqueue.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index ff942ff7c9b3..8adb0bb51d4d 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -27,18 +27,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
index 2d7898fda0f1..c3c3dc10db17 100644
--- a/tools/testing/selftests/timers/nsleep-lat.c
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -24,18 +24,7 @@
 #include <sys/timex.h>
 #include <string.h>
 #include <signal.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000ULL
 
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 30906bfd9c1b..ca6cd146aafe 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -25,19 +25,7 @@
 #include <sys/time.h>
 #include <sys/timex.h>
 #include <time.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
-
 
 #define CLOCK_MONOTONIC_RAW		4
 #define NSEC_PER_SEC 1000000000LL
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
index f61170f7b024..411eff625e66 100644
--- a/tools/testing/selftests/timers/rtctest.c
+++ b/tools/testing/selftests/timers/rtctest.c
@@ -221,6 +221,11 @@ test_READ:
 	/* Read the current alarm settings */
 	retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
 	if (retval == -1) {
+		if (errno == EINVAL) {
+			fprintf(stderr,
+					"\n...EINVAL reading current alarm setting.\n");
+			goto test_PIE;
+		}
 		perror("RTC_ALM_READ ioctl");
 		exit(errno);
 	}
@@ -231,7 +236,7 @@ test_READ:
 	/* Enable alarm interrupts */
 	retval = ioctl(fd, RTC_AIE_ON, 0);
 	if (retval == -1) {
-		if (errno == EINVAL) {
+		if (errno == EINVAL || errno == EIO) {
 			fprintf(stderr,
 				"\n...Alarm IRQs not supported.\n");
 			goto test_PIE;
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
index c8a7e14446b1..688cfd81b531 100644
--- a/tools/testing/selftests/timers/set-2038.c
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -27,18 +27,7 @@
 #include <unistd.h>
 #include <time.h>
 #include <sys/time.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
index dc88dbc8831f..70fed27d8fd3 100644
--- a/tools/testing/selftests/timers/set-tai.c
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -23,18 +23,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 int set_tai(int offset)
 {
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 15434da23b04..50da45437daa 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -28,18 +28,7 @@
 #include <signal.h>
 #include <stdlib.h>
 #include <pthread.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define CLOCK_REALTIME			0
 #define CLOCK_MONOTONIC			1
@@ -154,7 +143,8 @@ int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
 			printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
 					clockstring(clock_id),
 					flags ? "ABSTIME":"RELTIME");
-			return 0;
+			/* Indicate timer isn't set, so caller doesn't wait */
+			return 1;
 		}
 		printf("%s - timer_create() failed\n", clockstring(clock_id));
 		return -1;
@@ -224,8 +214,9 @@ int do_timer(int clock_id, int flags)
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
 	if (err)
-		return err;
+		return err == 1 ? 0 : err;
 
 	while (alarmcount < 5)
 		sleep(1);
@@ -239,18 +230,17 @@ int do_timer_oneshot(int clock_id, int flags)
 	timer_t tm1;
 	const int interval = 0;
 	struct timeval timeout;
-	fd_set fds;
 	int err;
 
 	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
 	if (err)
-		return err;
+		return err == 1 ? 0 : err;
 
 	memset(&timeout, 0, sizeof(timeout));
 	timeout.tv_sec = 5;
-	FD_ZERO(&fds);
 	do {
-		err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout);
+		err = select(0, NULL, NULL, NULL, &timeout);
 	} while (err == -1 && errno == EINTR);
 
 	timer_delete(tm1);
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
index f4184928b16b..877fd5532fee 100644
--- a/tools/testing/selftests/timers/set-tz.c
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -23,18 +23,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 int set_tz(int min, int dst)
 {
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 2a996e072259..022b711c78ee 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -35,18 +35,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/wait.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index e632e116f05e..759c9c06f1a0 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -21,19 +21,7 @@
 #include <stdlib.h>
 #include <sys/time.h>
 #include <pthread.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
-
 
 /* serializes shared list access */
 pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 60fe3c569bd9..d9d3ab93b31a 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -32,18 +32,7 @@
 #include <string.h>
 #include <signal.h>
 #include <unistd.h>
-#ifdef KTEST
 #include "../kselftest.h"
-#else
-static inline int ksft_exit_pass(void)
-{
-	exit(0);
-}
-static inline int ksft_exit_fail(void)
-{
-	exit(1);
-}
-#endif
 
 #define NSEC_PER_SEC 1000000000LL
 #define USEC_PER_SEC 1000000LL
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 1eae79ae5b4e..a2c53a3d223d 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -66,6 +66,8 @@
 #include <sys/wait.h>
 #include <pthread.h>
 #include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <stdbool.h>
 
 #ifdef __NR_userfaultfd
 
@@ -82,11 +84,17 @@ static int bounces;
 #define TEST_SHMEM	3
 static int test_type;
 
+/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
+#define ALARM_INTERVAL_SECS 10
+static volatile bool test_uffdio_copy_eexist = true;
+static volatile bool test_uffdio_zeropage_eexist = true;
+
+static bool map_shared;
 static int huge_fd;
 static char *huge_fd_off0;
 static unsigned long long *count_verify;
 static int uffd, uffd_flags, finished, *pipefd;
-static char *area_src, *area_dst;
+static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
 static char *zeropage;
 pthread_attr_t attr;
 
@@ -125,6 +133,9 @@ static void anon_allocate_area(void **alloc_area)
 	}
 }
 
+static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+}
 
 /* HugeTLB memory */
 static int hugetlb_release_pages(char *rel_area)
@@ -145,17 +156,51 @@ static int hugetlb_release_pages(char *rel_area)
 
 static void hugetlb_allocate_area(void **alloc_area)
 {
+	void *area_alias = NULL;
+	char **alloc_area_alias;
 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
-				MAP_PRIVATE | MAP_HUGETLB, huge_fd,
-				*alloc_area == area_src ? 0 :
-				nr_pages * page_size);
+			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+			   MAP_HUGETLB,
+			   huge_fd, *alloc_area == area_src ? 0 :
+			   nr_pages * page_size);
 	if (*alloc_area == MAP_FAILED) {
 		fprintf(stderr, "mmap of hugetlbfs file failed\n");
 		*alloc_area = NULL;
 	}
 
-	if (*alloc_area == area_src)
+	if (map_shared) {
+		area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+				  MAP_SHARED | MAP_HUGETLB,
+				  huge_fd, *alloc_area == area_src ? 0 :
+				  nr_pages * page_size);
+		if (area_alias == MAP_FAILED) {
+			if (munmap(*alloc_area, nr_pages * page_size) < 0)
+				perror("hugetlb munmap"), exit(1);
+			*alloc_area = NULL;
+			return;
+		}
+	}
+	if (*alloc_area == area_src) {
 		huge_fd_off0 = *alloc_area;
+		alloc_area_alias = &area_src_alias;
+	} else {
+		alloc_area_alias = &area_dst_alias;
+	}
+	if (area_alias)
+		*alloc_area_alias = area_alias;
+}
+
+static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+	if (!map_shared)
+		return;
+	/*
+	 * We can't zap just the pagetable with hugetlbfs because
+	 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
+	 * mapping where the pagetables are not established initially,
+	 * this way we'll exercise the -EEXEC at the fs level.
+	 */
+	*start = (unsigned long) area_dst_alias + offset;
 }
 
 /* Shared memory */
@@ -185,6 +230,7 @@ struct uffd_test_ops {
 	unsigned long expected_ioctls;
 	void (*allocate_area)(void **alloc_area);
 	int (*release_pages)(char *rel_area);
+	void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
 };
 
 #define ANON_EXPECTED_IOCTLS		((1 << _UFFDIO_WAKE) | \
@@ -195,18 +241,21 @@ static struct uffd_test_ops anon_uffd_test_ops = {
 	.expected_ioctls = ANON_EXPECTED_IOCTLS,
 	.allocate_area	= anon_allocate_area,
 	.release_pages	= anon_release_pages,
+	.alias_mapping = noop_alias_mapping,
 };
 
 static struct uffd_test_ops shmem_uffd_test_ops = {
-	.expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
+	.expected_ioctls = ANON_EXPECTED_IOCTLS,
 	.allocate_area	= shmem_allocate_area,
 	.release_pages	= shmem_release_pages,
+	.alias_mapping = noop_alias_mapping,
 };
 
 static struct uffd_test_ops hugetlb_uffd_test_ops = {
 	.expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
 	.allocate_area	= hugetlb_allocate_area,
 	.release_pages	= hugetlb_release_pages,
+	.alias_mapping = hugetlb_alias_mapping,
 };
 
 static struct uffd_test_ops *uffd_test_ops;
@@ -331,6 +380,23 @@ static void *locking_thread(void *arg)
 	return NULL;
 }
 
+static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
+			    unsigned long offset)
+{
+	uffd_test_ops->alias_mapping(&uffdio_copy->dst,
+				     uffdio_copy->len,
+				     offset);
+	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
+		/* real retval in ufdio_copy.copy */
+		if (uffdio_copy->copy != -EEXIST)
+			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
+				uffdio_copy->copy), exit(1);
+	} else {
+		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
+			uffdio_copy->copy), exit(1);
+	}
+}
+
 static int copy_page(int ufd, unsigned long offset)
 {
 	struct uffdio_copy uffdio_copy;
@@ -351,8 +417,13 @@ static int copy_page(int ufd, unsigned long offset)
 	} else if (uffdio_copy.copy != page_size) {
 		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
 			uffdio_copy.copy), exit(1);
-	} else
+	} else {
+		if (test_uffdio_copy_eexist) {
+			test_uffdio_copy_eexist = false;
+			retry_copy_page(ufd, &uffdio_copy, offset);
+		}
 		return 1;
+	}
 	return 0;
 }
 
@@ -408,6 +479,7 @@ static void *uffd_poll_thread(void *arg)
 				userfaults++;
 			break;
 		case UFFD_EVENT_FORK:
+			close(uffd);
 			uffd = msg.arg.fork.ufd;
 			pollfd[0].fd = uffd;
 			break;
@@ -572,6 +644,17 @@ static int userfaultfd_open(int features)
 	return 0;
 }
 
+sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+	if (sig == SIGBUS) {
+		if (sigbuf)
+			siglongjmp(*sigbuf, 1);
+		abort();
+	}
+}
+
 /*
  * For non-cooperative userfaultfd test we fork() a process that will
  * generate pagefaults, will mremap the area monitored by the
@@ -585,19 +668,59 @@ static int userfaultfd_open(int features)
  * The release of the pages currently generates event for shmem and
  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
  * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
+ * monitored area, generate pagefaults and test that signal is delivered.
+ * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
+ * test robustness use case - we release monitored area, fork a process
+ * that will generate pagefaults and verify signal is generated.
+ * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
+ * feature. Using monitor thread, verify no userfault events are generated.
  */
-static int faulting_process(void)
+static int faulting_process(int signal_test)
 {
 	unsigned long nr;
 	unsigned long long count;
 	unsigned long split_nr_pages;
+	unsigned long lastnr;
+	struct sigaction act;
+	unsigned long signalled = 0;
 
 	if (test_type != TEST_HUGETLB)
 		split_nr_pages = (nr_pages + 1) / 2;
 	else
 		split_nr_pages = nr_pages;
 
+	if (signal_test) {
+		sigbuf = &jbuf;
+		memset(&act, 0, sizeof(act));
+		act.sa_sigaction = sighndl;
+		act.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGBUS, &act, 0)) {
+			perror("sigaction");
+			return 1;
+		}
+		lastnr = (unsigned long)-1;
+	}
+
 	for (nr = 0; nr < split_nr_pages; nr++) {
+		if (signal_test) {
+			if (sigsetjmp(*sigbuf, 1) != 0) {
+				if (nr == lastnr) {
+					fprintf(stderr, "Signal repeated\n");
+					return 1;
+				}
+
+				lastnr = nr;
+				if (signal_test == 1) {
+					if (copy_page(uffd, nr * page_size))
+						signalled++;
+				} else {
+					signalled++;
+					continue;
+				}
+			}
+		}
+
 		count = *area_count(area_dst, nr);
 		if (count != count_verify[nr]) {
 			fprintf(stderr,
@@ -607,6 +730,9 @@ static int faulting_process(void)
 		}
 	}
 
+	if (signal_test)
+		return signalled != split_nr_pages;
+
 	if (test_type == TEST_HUGETLB)
 		return 0;
 
@@ -636,6 +762,23 @@ static int faulting_process(void)
 	return 0;
 }
 
+static void retry_uffdio_zeropage(int ufd,
+				  struct uffdio_zeropage *uffdio_zeropage,
+				  unsigned long offset)
+{
+	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
+				     uffdio_zeropage->range.len,
+				     offset);
+	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
+		if (uffdio_zeropage->zeropage != -EEXIST)
+			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
+				uffdio_zeropage->zeropage), exit(1);
+	} else {
+		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
+			uffdio_zeropage->zeropage), exit(1);
+	}
+}
+
 static int uffdio_zeropage(int ufd, unsigned long offset)
 {
 	struct uffdio_zeropage uffdio_zeropage;
@@ -670,8 +813,14 @@ static int uffdio_zeropage(int ufd, unsigned long offset)
 		if (uffdio_zeropage.zeropage != page_size) {
 			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
 				uffdio_zeropage.zeropage), exit(1);
-		} else
+		} else {
+			if (test_uffdio_zeropage_eexist) {
+				test_uffdio_zeropage_eexist = false;
+				retry_uffdio_zeropage(ufd, &uffdio_zeropage,
+						      offset);
+			}
 			return 1;
+		}
 	} else {
 		fprintf(stderr,
 			"UFFDIO_ZEROPAGE succeeded %Ld\n",
@@ -761,7 +910,7 @@ static int userfaultfd_events_test(void)
 		perror("fork"), exit(1);
 
 	if (!pid)
-		return faulting_process();
+		return faulting_process(0);
 
 	waitpid(pid, &err, 0);
 	if (err)
@@ -778,6 +927,72 @@ static int userfaultfd_events_test(void)
 	return userfaults != nr_pages;
 }
 
+static int userfaultfd_sig_test(void)
+{
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+	unsigned long userfaults;
+	pthread_t uffd_mon;
+	int err, features;
+	pid_t pid;
+	char c;
+
+	printf("testing signal delivery: ");
+	fflush(stdout);
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
+	if (userfaultfd_open(features) < 0)
+		return 1;
+	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+	uffdio_register.range.start = (unsigned long) area_dst;
+	uffdio_register.range.len = nr_pages * page_size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		fprintf(stderr, "register failure\n"), exit(1);
+
+	expected_ioctls = uffd_test_ops->expected_ioctls;
+	if ((uffdio_register.ioctls & expected_ioctls) !=
+	    expected_ioctls)
+		fprintf(stderr,
+			"unexpected missing ioctl for anon memory\n"),
+			exit(1);
+
+	if (faulting_process(1))
+		fprintf(stderr, "faulting process failed\n"), exit(1);
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+		perror("uffd_poll_thread create"), exit(1);
+
+	pid = fork();
+	if (pid < 0)
+		perror("fork"), exit(1);
+
+	if (!pid)
+		exit(faulting_process(2));
+
+	waitpid(pid, &err, 0);
+	if (err)
+		fprintf(stderr, "faulting process failed\n"), exit(1);
+
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+		perror("pipe write"), exit(1);
+	if (pthread_join(uffd_mon, (void **)&userfaults))
+		return 1;
+
+	printf("done.\n");
+	if (userfaults)
+		fprintf(stderr, "Signal test failed, userfaults: %ld\n",
+			userfaults);
+	close(uffd);
+	return userfaults != 0;
+}
 static int userfaultfd_stress(void)
 {
 	void *area;
@@ -879,6 +1094,15 @@ static int userfaultfd_stress(void)
 			return 1;
 		}
 
+		if (area_dst_alias) {
+			uffdio_register.range.start = (unsigned long)
+				area_dst_alias;
+			if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+				fprintf(stderr, "register failure alias\n");
+				return 1;
+			}
+		}
+
 		/*
 		 * The madvise done previously isn't enough: some
 		 * uffd_thread could have read userfaults (one of
@@ -912,9 +1136,17 @@ static int userfaultfd_stress(void)
 
 		/* unregister */
 		if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
-			fprintf(stderr, "register failure\n");
+			fprintf(stderr, "unregister failure\n");
 			return 1;
 		}
+		if (area_dst_alias) {
+			uffdio_register.range.start = (unsigned long) area_dst;
+			if (ioctl(uffd, UFFDIO_UNREGISTER,
+				  &uffdio_register.range)) {
+				fprintf(stderr, "unregister failure alias\n");
+				return 1;
+			}
+		}
 
 		/* verification */
 		if (bounces & BOUNCE_VERIFY) {
@@ -936,6 +1168,10 @@ static int userfaultfd_stress(void)
 		area_src = area_dst;
 		area_dst = tmp_area;
 
+		tmp_area = area_src_alias;
+		area_src_alias = area_dst_alias;
+		area_dst_alias = tmp_area;
+
 		printf("userfaults:");
 		for (cpu = 0; cpu < nr_cpus; cpu++)
 			printf(" %lu", userfaults[cpu]);
@@ -946,7 +1182,8 @@ static int userfaultfd_stress(void)
 		return err;
 
 	close(uffd);
-	return userfaultfd_zeropage_test() || userfaultfd_events_test();
+	return userfaultfd_zeropage_test() || userfaultfd_sig_test()
+		|| userfaultfd_events_test();
 }
 
 /*
@@ -981,7 +1218,12 @@ static void set_test_type(const char *type)
 	} else if (!strcmp(type, "hugetlb")) {
 		test_type = TEST_HUGETLB;
 		uffd_test_ops = &hugetlb_uffd_test_ops;
+	} else if (!strcmp(type, "hugetlb_shared")) {
+		map_shared = true;
+		test_type = TEST_HUGETLB;
+		uffd_test_ops = &hugetlb_uffd_test_ops;
 	} else if (!strcmp(type, "shmem")) {
+		map_shared = true;
 		test_type = TEST_SHMEM;
 		uffd_test_ops = &shmem_uffd_test_ops;
 	} else {
@@ -1001,12 +1243,25 @@ static void set_test_type(const char *type)
 		fprintf(stderr, "Impossible to run this test\n"), exit(2);
 }
 
+static void sigalrm(int sig)
+{
+	if (sig != SIGALRM)
+		abort();
+	test_uffdio_copy_eexist = true;
+	test_uffdio_zeropage_eexist = true;
+	alarm(ALARM_INTERVAL_SECS);
+}
+
 int main(int argc, char **argv)
 {
 	if (argc < 4)
 		fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
 				exit(1);
 
+	if (signal(SIGALRM, sigalrm) == SIG_ERR)
+		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+	alarm(ALARM_INTERVAL_SECS);
+
 	set_test_type(argv[1]);
 
 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
index f863c664e3d1..ee068511fd0b 100644
--- a/tools/testing/selftests/watchdog/Makefile
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -1,8 +1,3 @@
-TEST_PROGS := watchdog-test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := watchdog-test
 
 include ../lib.mk
-
-clean:
-	rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index a74c9d739d07..a1391be2dc1e 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -9,12 +9,25 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <signal.h>
+#include <getopt.h>
 #include <sys/ioctl.h>
 #include <linux/types.h>
 #include <linux/watchdog.h>
 
+#define DEFAULT_PING_RATE	1
+
 int fd;
 const char v = 'V';
+static const char sopts[] = "bdehp:t:";
+static const struct option lopts[] = {
+	{"bootstatus",          no_argument, NULL, 'b'},
+	{"disable",             no_argument, NULL, 'd'},
+	{"enable",              no_argument, NULL, 'e'},
+	{"help",                no_argument, NULL, 'h'},
+	{"pingrate",      required_argument, NULL, 'p'},
+	{"timeout",       required_argument, NULL, 't'},
+	{NULL,                  no_argument, NULL, 0x0}
+};
 
 /*
  * This function simply sends an IOCTL to the driver, which in turn ticks
@@ -23,12 +36,12 @@ const char v = 'V';
  */
 static void keep_alive(void)
 {
-    int dummy;
-    int ret;
+	int dummy;
+	int ret;
 
-    ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
-    if (!ret)
-        printf(".");
+	ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+	if (!ret)
+		printf(".");
 }
 
 /*
@@ -38,75 +51,110 @@ static void keep_alive(void)
 
 static void term(int sig)
 {
-    int ret = write(fd, &v, 1);
-
-    close(fd);
-    if (ret < 0)
-	printf("\nStopping watchdog ticks failed (%d)...\n", errno);
-    else
-	printf("\nStopping watchdog ticks...\n");
-    exit(0);
+	int ret = write(fd, &v, 1);
+
+	close(fd);
+	if (ret < 0)
+		printf("\nStopping watchdog ticks failed (%d)...\n", errno);
+	else
+		printf("\nStopping watchdog ticks...\n");
+	exit(0);
+}
+
+static void usage(char *progname)
+{
+	printf("Usage: %s [options]\n", progname);
+	printf(" -b, --bootstatus    Get last boot status (Watchdog/POR)\n");
+	printf(" -d, --disable       Turn off the watchdog timer\n");
+	printf(" -e, --enable        Turn on the watchdog timer\n");
+	printf(" -h, --help          Print the help message\n");
+	printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
+	printf(" -t, --timeout=T     Set timeout to T seconds\n");
+	printf("\n");
+	printf("Parameters are parsed left-to-right in real-time.\n");
+	printf("Example: %s -d -t 10 -p 5 -e\n", progname);
 }
 
 int main(int argc, char *argv[])
 {
-    int flags;
-    unsigned int ping_rate = 1;
-    int ret;
-    int i;
-
-    setbuf(stdout, NULL);
-
-    fd = open("/dev/watchdog", O_WRONLY);
-
-    if (fd == -1) {
-	printf("Watchdog device not enabled.\n");
-	exit(-1);
-    }
-
-    for (i = 1; i < argc; i++) {
-        if (!strncasecmp(argv[i], "-d", 2)) {
-            flags = WDIOS_DISABLECARD;
-            ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
-            if (!ret)
-                printf("Watchdog card disabled.\n");
-        } else if (!strncasecmp(argv[i], "-e", 2)) {
-            flags = WDIOS_ENABLECARD;
-            ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
-            if (!ret)
-                printf("Watchdog card enabled.\n");
-        } else if (!strncasecmp(argv[i], "-t", 2) && argv[2]) {
-            flags = atoi(argv[i + 1]);
-            ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
-            if (!ret)
-                printf("Watchdog timeout set to %u seconds.\n", flags);
-            i++;
-        } else if (!strncasecmp(argv[i], "-p", 2) && argv[2]) {
-            ping_rate = strtoul(argv[i + 1], NULL, 0);
-            printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
-            i++;
-        } else {
-            printf("-d to disable, -e to enable, -t <n> to set "
-                "the timeout,\n-p <n> to set the ping rate, and ");
-            printf("run by itself to tick the card.\n");
-            printf("Parameters are parsed left-to-right in real-time.\n");
-            printf("Example: %s -d -t 10 -p 5 -e\n", argv[0]);
-            goto end;
-        }
-    }
-
-    printf("Watchdog Ticking Away!\n");
-
-    signal(SIGINT, term);
-
-    while(1) {
-	keep_alive();
-	sleep(ping_rate);
-    }
+	int flags;
+	unsigned int ping_rate = DEFAULT_PING_RATE;
+	int ret;
+	int c;
+	int oneshot = 0;
+
+	setbuf(stdout, NULL);
+
+	fd = open("/dev/watchdog", O_WRONLY);
+
+	if (fd == -1) {
+		printf("Watchdog device not enabled.\n");
+		exit(-1);
+	}
+
+	while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+		switch (c) {
+		case 'b':
+			flags = 0;
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+			if (!ret)
+				printf("Last boot is caused by: %s.\n", (flags != 0) ?
+					"Watchdog" : "Power-On-Reset");
+			else
+				printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno));
+			break;
+		case 'd':
+			flags = WDIOS_DISABLECARD;
+			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+			if (!ret)
+				printf("Watchdog card disabled.\n");
+			else
+				printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno));
+			break;
+		case 'e':
+			flags = WDIOS_ENABLECARD;
+			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+			if (!ret)
+				printf("Watchdog card enabled.\n");
+			else
+				printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno));
+			break;
+		case 'p':
+			ping_rate = strtoul(optarg, NULL, 0);
+			if (!ping_rate)
+				ping_rate = DEFAULT_PING_RATE;
+			printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
+			break;
+		case 't':
+			flags = strtoul(optarg, NULL, 0);
+			ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+			if (!ret)
+				printf("Watchdog timeout set to %u seconds.\n", flags);
+			else
+				printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno));
+			break;
+		default:
+			usage(argv[0]);
+			goto end;
+		}
+	}
+
+	if (oneshot)
+		goto end;
+
+	printf("Watchdog Ticking Away!\n");
+
+	signal(SIGINT, term);
+
+	while (1) {
+		keep_alive();
+		sleep(ping_rate);
+	}
 end:
-    ret = write(fd, &v, 1);
-    if (ret < 0)
-	printf("Stopping watchdog ticks failed (%d)...\n", errno);
-    close(fd);
-    return 0;
+	ret = write(fd, &v, 1);
+	if (ret < 0)
+		printf("Stopping watchdog ticks failed (%d)...\n", errno);
+	close(fd);
+	return 0;
 }
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 97f187e2663f..0a74a20ca32b 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -20,7 +20,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
 
-CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie
 
 UNAME_M := $(shell uname -m)
 CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
index a8df159a8924..ec0f6b45ce8b 100644
--- a/tools/testing/selftests/x86/mpx-mini-test.c
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -391,8 +391,7 @@ void handler(int signum, siginfo_t *si, void *vucontext)
 		br_count++;
 		dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
 
-#define __SI_FAULT      (3 << 16)
-#define SEGV_BNDERR     (__SI_FAULT|3)  /* failed address bound checks */
+#define SEGV_BNDERR     3  /* failed address bound checks */
 
 		dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
 				status, ip, br_reason);
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 3237bc010e1c..23927845518d 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -212,19 +212,18 @@ void dump_mem(void *dumpme, int len_bytes)
 	}
 }
 
-#define __SI_FAULT      (3 << 16)
-#define SEGV_BNDERR     (__SI_FAULT|3)  /* failed address bound checks */
-#define SEGV_PKUERR     (__SI_FAULT|4)
+#define SEGV_BNDERR     3  /* failed address bound checks */
+#define SEGV_PKUERR     4
 
 static char *si_code_str(int si_code)
 {
-	if (si_code & SEGV_MAPERR)
+	if (si_code == SEGV_MAPERR)
 		return "SEGV_MAPERR";
-	if (si_code & SEGV_ACCERR)
+	if (si_code == SEGV_ACCERR)
 		return "SEGV_ACCERR";
-	if (si_code & SEGV_BNDERR)
+	if (si_code == SEGV_BNDERR)
 		return "SEGV_BNDERR";
-	if (si_code & SEGV_PKUERR)
+	if (si_code == SEGV_PKUERR)
 		return "SEGV_PKUERR";
 	return "UNKNOWN";
 }