From cf4a7207b1cb4a3c3fe3aa11a83c9d673722a7f5 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 22 Dec 2016 14:45:14 +0000
Subject: lib: Add a simple prime number generator

Prime numbers are interesting for testing components that use multiplies
and divides, such as testing DRM's struct drm_mm alignment computations.

v2: Move to lib/, add selftest
v3: Fix initial constants (exclude 0/1 from being primes)
v4: More RCU markup to keep 0day/sparse happy
v5: Fix RCU unwind on module exit, add to kselftests
v6: Tidy computation of bitmap size
v7: for_each_prime_number_from()
v8: Compose small-primes using BIT() for easier verification
v9: Move rcu dance entirely into callers.
v10: Improve quote for Betrand's Postulate (aka Chebyshev's theorem)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161222144514.3911-1-chris@chris-wilson.co.uk
---
 tools/testing/selftests/lib/prime_numbers.sh | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100755 tools/testing/selftests/lib/prime_numbers.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
new file mode 100755
index 000000000000..da4cbcd766f5
--- /dev/null
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Checks fast/slow prime_number generation for inconsistencies
+
+if ! /sbin/modprobe -q -r prime_numbers; then
+	echo "prime_numbers: [SKIP]"
+	exit 77
+fi
+
+if /sbin/modprobe -q prime_numbers selftest=65536; then
+	/sbin/modprobe -q -r prime_numbers
+	echo "prime_numbers: ok"
+else
+	echo "prime_numbers: [FAIL]"
+	exit 1
+fi
-- 
cgit v1.2.3


From 50f0033d1a0f3a8e9eed09ab68067fbb57b0669d Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 22 Dec 2016 08:36:09 +0000
Subject: drm: Add some kselftests for the DRM range manager (struct drm_mm)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

First we introduce a smattering of infrastructure for writing selftests.
The idea is that we have a test module that exercises a particular
portion of the exported API, and that module provides a set of tests
that can either be run as an ensemble via kselftest or individually via
an igt harness (in this case igt/drm_mm). To accommodate selecting
individual tests, we export a boolean parameter to control selection of
each test - that is hidden inside a bunch of reusable boilerplate macros
to keep writing the tests simple.

v2: Choose a random random_seed unless one is specified by the user.
v3: More parameters to control max_iterations and max_prime of the
tests.

Testcase: igt/drm_mm
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161222083641.2691-7-chris@chris-wilson.co.uk
---
 drivers/gpu/drm/Kconfig                       |  15 ++++
 drivers/gpu/drm/Makefile                      |   1 +
 drivers/gpu/drm/selftests/drm_mm_selftests.h  |   8 ++
 drivers/gpu/drm/selftests/drm_selftest.c      | 109 ++++++++++++++++++++++++++
 drivers/gpu/drm/selftests/drm_selftest.h      |  41 ++++++++++
 drivers/gpu/drm/selftests/test-drm_mm.c       |  58 ++++++++++++++
 tools/testing/selftests/drivers/gpu/drm_mm.sh |  15 ++++
 7 files changed, 247 insertions(+)
 create mode 100644 drivers/gpu/drm/selftests/drm_mm_selftests.h
 create mode 100644 drivers/gpu/drm/selftests/drm_selftest.c
 create mode 100644 drivers/gpu/drm/selftests/drm_selftest.h
 create mode 100644 drivers/gpu/drm/selftests/test-drm_mm.c
 create mode 100755 tools/testing/selftests/drivers/gpu/drm_mm.sh

(limited to 'tools/testing')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 45a1c7468e88..29146fa83001 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -48,6 +48,21 @@ config DRM_DEBUG_MM
 
 	  If in doubt, say "N".
 
+config DRM_DEBUG_MM_SELFTEST
+	tristate "kselftests for DRM range manager (struct drm_mm)"
+	depends on DRM
+	depends on DEBUG_KERNEL
+	select PRIME_NUMBERS
+	select DRM_LIB_RANDOM
+	default n
+	help
+	  This option provides a kernel module that can be used to test
+	  the DRM range manager (drm_mm) and its API. This option is not
+	  useful for distributions or general kernels, but only for kernel
+	  developers working on DRM and associated drivers.
+
+	  If in doubt, say "N".
+
 config DRM_KMS_HELPER
 	tristate
 	depends on DRM
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 6bb416360ae4..c0d1aed8588b 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -38,6 +38,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
 drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
 
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
+obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/test-drm_mm.o
 
 CFLAGS_drm_trace_points.o := -I$(src)
 
diff --git a/drivers/gpu/drm/selftests/drm_mm_selftests.h b/drivers/gpu/drm/selftests/drm_mm_selftests.h
new file mode 100644
index 000000000000..1610e0a63a5b
--- /dev/null
+++ b/drivers/gpu/drm/selftests/drm_mm_selftests.h
@@ -0,0 +1,8 @@
+/* List each unit test as selftest(name, function)
+ *
+ * The name is used as both an enum and expanded as igt__name to create
+ * a module parameter. It must be unique and legal for a C identifier.
+ *
+ * Tests are executed in order by igt/drm_mm
+ */
+selftest(sanitycheck, igt_sanitycheck) /* keep first (selfcheck for igt) */
diff --git a/drivers/gpu/drm/selftests/drm_selftest.c b/drivers/gpu/drm/selftests/drm_selftest.c
new file mode 100644
index 000000000000..e29ed9faef5b
--- /dev/null
+++ b/drivers/gpu/drm/selftests/drm_selftest.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/compiler.h>
+
+#define selftest(name, func) __idx_##name,
+enum {
+#include TESTS
+};
+#undef selftest
+
+#define selftest(n, f) [__idx_##n] = { .name = #n, .func = f },
+static struct drm_selftest {
+	bool enabled;
+	const char *name;
+	int (*func)(void *);
+} selftests[] = {
+#include TESTS
+};
+#undef selftest
+
+/* Embed the line number into the parameter name so that we can order tests */
+#define param(n) __PASTE(igt__, __PASTE(__PASTE(__LINE__, __), n))
+#define selftest_0(n, func, id) \
+module_param_named(id, selftests[__idx_##n].enabled, bool, 0400);
+#define selftest(n, func) selftest_0(n, func, param(n))
+#include TESTS
+#undef selftest
+
+static void set_default_test_all(struct drm_selftest *st, unsigned long count)
+{
+	unsigned long i;
+
+	for (i = 0; i < count; i++)
+		if (st[i].enabled)
+			return;
+
+	for (i = 0; i < count; i++)
+		st[i].enabled = true;
+}
+
+static int run_selftests(struct drm_selftest *st,
+			 unsigned long count,
+			 void *data)
+{
+	int err = 0;
+
+	set_default_test_all(st, count);
+
+	/* Tests are listed in natural order in drm_*_selftests.h */
+	for (; count--; st++) {
+		if (!st->enabled)
+			continue;
+
+		pr_debug("drm: Running %s\n", st->name);
+		err = st->func(data);
+		if (err)
+			break;
+	}
+
+	if (WARN(err > 0 || err == -ENOTTY,
+		 "%s returned %d, conflicting with selftest's magic values!\n",
+		 st->name, err))
+		err = -1;
+
+	rcu_barrier();
+	return err;
+}
+
+static int __maybe_unused
+__drm_subtests(const char *caller,
+	       const struct drm_subtest *st,
+	       int count,
+	       void *data)
+{
+	int err;
+
+	for (; count--; st++) {
+		pr_debug("Running %s/%s\n", caller, st->name);
+		err = st->func(data);
+		if (err) {
+			pr_err("%s: %s failed with error %d\n",
+			       caller, st->name, err);
+			return err;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/selftests/drm_selftest.h b/drivers/gpu/drm/selftests/drm_selftest.h
new file mode 100644
index 000000000000..c784ec02ff53
--- /dev/null
+++ b/drivers/gpu/drm/selftests/drm_selftest.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __DRM_SELFTEST_H__
+#define __DRM_SELFTEST_H__
+
+struct drm_subtest {
+	int (*func)(void *data);
+	const char *name;
+};
+
+static int __drm_subtests(const char *caller,
+			  const struct drm_subtest *st,
+			  int count,
+			  void *data);
+#define drm_subtests(T, data) \
+	__drm_subtests(__func__, T, ARRAY_SIZE(T), data)
+
+#define SUBTEST(x) { x, #x }
+
+#endif /* __DRM_SELFTEST_H__ */
diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c
new file mode 100644
index 000000000000..682f5f86f465
--- /dev/null
+++ b/drivers/gpu/drm/selftests/test-drm_mm.c
@@ -0,0 +1,58 @@
+/*
+ * Test cases for the drm_mm range manager
+ */
+
+#define pr_fmt(fmt) "drm_mm: " fmt
+
+#include <linux/module.h>
+#include <linux/prime_numbers.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm_mm.h>
+
+#include "../lib/drm_random.h"
+
+#define TESTS "drm_mm_selftests.h"
+#include "drm_selftest.h"
+
+static unsigned int random_seed;
+static unsigned int max_iterations = 8192;
+static unsigned int max_prime = 128;
+
+static int igt_sanitycheck(void *ignored)
+{
+	pr_info("%s - ok!\n", __func__);
+	return 0;
+}
+
+#include "drm_selftest.c"
+
+static int __init test_drm_mm_init(void)
+{
+	int err;
+
+	while (!random_seed)
+		random_seed = get_random_int();
+
+	pr_info("Testing DRM range manger (struct drm_mm), with random_seed=0x%x max_iterations=%u max_prime=%u\n",
+		random_seed, max_iterations, max_prime);
+	err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL);
+
+	return err > 0 ? 0 : err;
+}
+
+static void __exit test_drm_mm_exit(void)
+{
+}
+
+module_init(test_drm_mm_init);
+module_exit(test_drm_mm_exit);
+
+module_param(random_seed, uint, 0400);
+module_param(max_iterations, uint, 0400);
+module_param(max_prime, uint, 0400);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
new file mode 100755
index 000000000000..96dd55c92799
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# Runs API tests for struct drm_mm (DRM range manager)
+
+if ! /sbin/modprobe -n -q test-drm_mm; then
+       echo "drivers/gpu/drm_mm: [skip]"
+       exit 77
+fi
+
+if /sbin/modprobe -q test-drm_mm; then
+       /sbin/modprobe -q -r test-drm_mm
+       echo "drivers/gpu/drm_mm: ok"
+else
+       echo "drivers/gpu/drm_mm: [FAIL]"
+       exit 1
+fi
-- 
cgit v1.2.3


From fe878cad38d389cf5dbcce501951b2269d6d02bf Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Tue, 3 Jan 2017 06:31:48 -0800
Subject: tools: test case for TPACKET_V3/TX_RING support

Add a test case and sample code for (TPACKET_V3, PACKET_TX_RING)

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_tpacket.c | 91 +++++++++++++++++++++++------
 1 file changed, 74 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 24adf709bd9d..4a1bc648b35e 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -311,20 +311,33 @@ static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
 	__sync_synchronize();
 }
 
-static inline int __v1_v2_tx_kernel_ready(void *base, int version)
+static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
+{
+	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_SEND_REQUEST;
+	__sync_synchronize();
+}
+
+static inline int __tx_kernel_ready(void *base, int version)
 {
 	switch (version) {
 	case TPACKET_V1:
 		return __v1_tx_kernel_ready(base);
 	case TPACKET_V2:
 		return __v2_tx_kernel_ready(base);
+	case TPACKET_V3:
+		return __v3_tx_kernel_ready(base);
 	default:
 		bug_on(1);
 		return 0;
 	}
 }
 
-static inline void __v1_v2_tx_user_ready(void *base, int version)
+static inline void __tx_user_ready(void *base, int version)
 {
 	switch (version) {
 	case TPACKET_V1:
@@ -333,6 +346,9 @@ static inline void __v1_v2_tx_user_ready(void *base, int version)
 	case TPACKET_V2:
 		__v2_tx_user_ready(base);
 		break;
+	case TPACKET_V3:
+		__v3_tx_user_ready(base);
+		break;
 	}
 }
 
@@ -348,7 +364,22 @@ static void __v1_v2_set_packet_loss_discard(int sock)
 	}
 }
 
-static void walk_v1_v2_tx(int sock, struct ring *ring)
+static inline void *get_next_frame(struct ring *ring, int n)
+{
+	uint8_t *f0 = ring->rd[0].iov_base;
+
+	switch (ring->version) {
+	case TPACKET_V1:
+	case TPACKET_V2:
+		return ring->rd[n].iov_base;
+	case TPACKET_V3:
+		return f0 + (n * ring->req3.tp_frame_size);
+	default:
+		bug_on(1);
+	}
+}
+
+static void walk_tx(int sock, struct ring *ring)
 {
 	struct pollfd pfd;
 	int rcv_sock, ret;
@@ -360,9 +391,19 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
 		.sll_family = PF_PACKET,
 		.sll_halen = ETH_ALEN,
 	};
+	int nframes;
+
+	/* TPACKET_V{1,2} sets up the ring->rd* related variables based
+	 * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
+	 * up based on blocks (e.g, rd_num is  tp_block_nr)
+	 */
+	if (ring->version <= TPACKET_V2)
+		nframes = ring->rd_num;
+	else
+		nframes = ring->req3.tp_frame_nr;
 
 	bug_on(ring->type != PACKET_TX_RING);
-	bug_on(ring->rd_num < NUM_PACKETS);
+	bug_on(nframes < NUM_PACKETS);
 
 	rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
 	if (rcv_sock == -1) {
@@ -388,10 +429,11 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
 	create_payload(packet, &packet_len);
 
 	while (total_packets > 0) {
-		while (__v1_v2_tx_kernel_ready(ring->rd[frame_num].iov_base,
-					       ring->version) &&
+		void *next = get_next_frame(ring, frame_num);
+
+		while (__tx_kernel_ready(next, ring->version) &&
 		       total_packets > 0) {
-			ppd.raw = ring->rd[frame_num].iov_base;
+			ppd.raw = next;
 
 			switch (ring->version) {
 			case TPACKET_V1:
@@ -413,14 +455,27 @@ static void walk_v1_v2_tx(int sock, struct ring *ring)
 				       packet_len);
 				total_bytes += ppd.v2->tp_h.tp_snaplen;
 				break;
+			case TPACKET_V3: {
+				struct tpacket3_hdr *tx = next;
+
+				tx->tp_snaplen = packet_len;
+				tx->tp_len = packet_len;
+				tx->tp_next_offset = 0;
+
+				memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
+				       sizeof(struct sockaddr_ll), packet,
+				       packet_len);
+				total_bytes += tx->tp_snaplen;
+				break;
+			}
 			}
 
 			status_bar_update();
 			total_packets--;
 
-			__v1_v2_tx_user_ready(ppd.raw, ring->version);
+			__tx_user_ready(next, ring->version);
 
-			frame_num = (frame_num + 1) % ring->rd_num;
+			frame_num = (frame_num + 1) % nframes;
 		}
 
 		poll(&pfd, 1, 1);
@@ -460,7 +515,7 @@ static void walk_v1_v2(int sock, struct ring *ring)
 	if (ring->type == PACKET_RX_RING)
 		walk_v1_v2_rx(sock, ring);
 	else
-		walk_v1_v2_tx(sock, ring);
+		walk_tx(sock, ring);
 }
 
 static uint64_t __v3_prev_block_seq_num = 0;
@@ -583,7 +638,7 @@ static void walk_v3(int sock, struct ring *ring)
 	if (ring->type == PACKET_RX_RING)
 		walk_v3_rx(sock, ring);
 	else
-		bug_on(1);
+		walk_tx(sock, ring);
 }
 
 static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
@@ -602,12 +657,13 @@ static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
 	ring->flen = ring->req.tp_frame_size;
 }
 
-static void __v3_fill(struct ring *ring, unsigned int blocks)
+static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
 {
-	ring->req3.tp_retire_blk_tov = 64;
-	ring->req3.tp_sizeof_priv = 0;
-	ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
-
+	if (type == PACKET_RX_RING) {
+		ring->req3.tp_retire_blk_tov = 64;
+		ring->req3.tp_sizeof_priv = 0;
+		ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+	}
 	ring->req3.tp_block_size = getpagesize() << 2;
 	ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
 	ring->req3.tp_block_nr = blocks;
@@ -641,7 +697,7 @@ static void setup_ring(int sock, struct ring *ring, int version, int type)
 		break;
 
 	case TPACKET_V3:
-		__v3_fill(ring, blocks);
+		__v3_fill(ring, blocks, type);
 		ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
 				 sizeof(ring->req3));
 		break;
@@ -796,6 +852,7 @@ int main(void)
 	ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
 
 	ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
+	ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
 
 	if (ret)
 		return 1;
-- 
cgit v1.2.3


From 6606021401032919c559a829a5d273ba1741b434 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Mon, 26 Dec 2016 19:20:50 -0800
Subject: selftests/x86: Add a selftest for SYSRET to noncanonical addresses

SYSRET to a noncanonical address will blow up on Intel CPUs.  Linux
needs to prevent this from happening in two major cases, and the
criteria will become more complicated when support for larger virtual
address spaces is added.

A fast-path SYSCALL will fall through to the following instruction
using SYSRET without any particular checking.  To prevent fall-through
to a noncanonical address, Linux prevents the highest canonical page
from being mapped.  This test case checks a variety of possible maximum
addresses to make sure that either we can't map code there or that
SYSCALL fall-through works.

A slow-path system call can return anywhere.  Linux needs to make sure
that, if the return address is non-canonical, it won't use SYSRET.
This test cases causes sigreturn() to return to a variety of addresses
(with RCX == RIP) and makes sure that nothing explodes.

Some of this code comes from Kirill Shutemov.

Kirill reported the following output with 5-level paging enabled:

  [RUN]   sigreturn to 0x800000000000
  [OK]    Got SIGSEGV at RIP=0x800000000000
  [RUN]   sigreturn to 0x1000000000000
  [OK]    Got SIGSEGV at RIP=0x1000000000000
  [RUN]   sigreturn to 0x2000000000000
  [OK]    Got SIGSEGV at RIP=0x2000000000000
  [RUN]   sigreturn to 0x4000000000000
  [OK]    Got SIGSEGV at RIP=0x4000000000000
  [RUN]   sigreturn to 0x8000000000000
  [OK]    Got SIGSEGV at RIP=0x8000000000000
  [RUN]   sigreturn to 0x10000000000000
  [OK]    Got SIGSEGV at RIP=0x10000000000000
  [RUN]   sigreturn to 0x20000000000000
  [OK]    Got SIGSEGV at RIP=0x20000000000000
  [RUN]   sigreturn to 0x40000000000000
  [OK]    Got SIGSEGV at RIP=0x40000000000000
  [RUN]   sigreturn to 0x80000000000000
  [OK]    Got SIGSEGV at RIP=0x80000000000000
  [RUN]   sigreturn to 0x100000000000000
  [OK]    Got SIGSEGV at RIP=0x100000000000000
  [RUN]   sigreturn to 0x200000000000000
  [OK]    Got SIGSEGV at RIP=0x200000000000000
  [RUN]   sigreturn to 0x400000000000000
  [OK]    Got SIGSEGV at RIP=0x400000000000000
  [RUN]   sigreturn to 0x800000000000000
  [OK]    Got SIGSEGV at RIP=0x800000000000000
  [RUN]   sigreturn to 0x1000000000000000
  [OK]    Got SIGSEGV at RIP=0x1000000000000000
  [RUN]   sigreturn to 0x2000000000000000
  [OK]    Got SIGSEGV at RIP=0x2000000000000000
  [RUN]   sigreturn to 0x4000000000000000
  [OK]    Got SIGSEGV at RIP=0x4000000000000000
  [RUN]   sigreturn to 0x8000000000000000
  [OK]    Got SIGSEGV at RIP=0x8000000000000000
  [RUN]   Trying a SYSCALL that falls through to 0x7fffffffe000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x7ffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x800000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0xfffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x1000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x1fffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x2000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x3fffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x4000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x7fffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x8000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0xffffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x10000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x1ffffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x20000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x3ffffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x40000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x7ffffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x80000000000000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0xfffffffffff000
  [OK]    We survived
  [RUN]   Trying a SYSCALL that falls through to 0x100000000000000
  [OK]    mremap to 0xfffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x1fffffffffff000
  [OK]    mremap to 0x1ffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x200000000000000
  [OK]    mremap to 0x1fffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x3fffffffffff000
  [OK]    mremap to 0x3ffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x400000000000000
  [OK]    mremap to 0x3fffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x7fffffffffff000
  [OK]    mremap to 0x7ffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x800000000000000
  [OK]    mremap to 0x7fffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0xffffffffffff000
  [OK]    mremap to 0xfffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x1000000000000000
  [OK]    mremap to 0xffffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x1ffffffffffff000
  [OK]    mremap to 0x1fffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x2000000000000000
  [OK]    mremap to 0x1ffffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x3ffffffffffff000
  [OK]    mremap to 0x3fffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x4000000000000000
  [OK]    mremap to 0x3ffffffffffff000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x7ffffffffffff000
  [OK]    mremap to 0x7fffffffffffe000 failed
  [RUN]   Trying a SYSCALL that falls through to 0x8000000000000000
  [OK]    mremap to 0x7ffffffffffff000 failed

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/e70bd9a3f90657ba47b755100a20475d038fa26b.1482808435.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile     |   2 +-
 tools/testing/selftests/x86/sysret_rip.c | 195 +++++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/sysret_rip.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 8c1cb423cfe6..25d4067c11e4 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -10,7 +10,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_sysc
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
 TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
new file mode 100644
index 000000000000..d85ec5b3671c
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -0,0 +1,195 @@
+/*
+ * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <sys/syscall.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <setjmp.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+
+asm (
+	".pushsection \".text\", \"ax\"\n\t"
+	".balign 4096\n\t"
+	"test_page: .globl test_page\n\t"
+	".fill 4094,1,0xcc\n\t"
+	"test_syscall_insn:\n\t"
+	"syscall\n\t"
+	".ifne . - test_page - 4096\n\t"
+	".error \"test page is not one page long\"\n\t"
+	".endif\n\t"
+	".popsection"
+    );
+
+extern const char test_page[];
+static void const *current_test_page_addr = test_page;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs;
+
+static volatile unsigned long rip;
+
+static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+		printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n",
+		       rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+		fflush(stdout);
+		_exit(1);
+	}
+
+	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+	printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip);
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+	/* Set IP and CX to match so that SYSRET can happen. */
+	ctx->uc_mcontext.gregs[REG_RIP] = rip;
+	ctx->uc_mcontext.gregs[REG_RCX] = rip;
+
+	/* R11 and EFLAGS should already match. */
+	assert(ctx->uc_mcontext.gregs[REG_EFL] ==
+	       ctx->uc_mcontext.gregs[REG_R11]);
+
+	sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
+
+	return;
+}
+
+static void test_sigreturn_to(unsigned long ip)
+{
+	rip = ip;
+	printf("[RUN]\tsigreturn to 0x%lx\n", ip);
+	raise(SIGUSR1);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+		printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n",
+		       rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+		fflush(stdout);
+		_exit(1);
+	}
+
+	siglongjmp(jmpbuf, 1);
+}
+
+static void test_syscall_fallthrough_to(unsigned long ip)
+{
+	void *new_address = (void *)(ip - 4096);
+	void *ret;
+
+	printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip);
+
+	ret = mremap((void *)current_test_page_addr, 4096, 4096,
+		     MREMAP_MAYMOVE | MREMAP_FIXED, new_address);
+	if (ret == MAP_FAILED) {
+		if (ip <= (1UL << 47) - PAGE_SIZE) {
+			err(1, "mremap to %p", new_address);
+		} else {
+			printf("[OK]\tmremap to %p failed\n", new_address);
+			return;
+		}
+	}
+
+	if (ret != new_address)
+		errx(1, "mremap malfunctioned: asked for %p but got %p\n",
+		     new_address, ret);
+
+	current_test_page_addr = new_address;
+	rip = ip;
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid),
+			      [syscall_insn] "rm" (ip - 2));
+		errx(1, "[FAIL]\tSyscall trampoline returned");
+	}
+
+	printf("[OK]\tWe survived\n");
+}
+
+int main()
+{
+	/*
+	 * When the kernel returns from a slow-path syscall, it will
+	 * detect whether SYSRET is appropriate.  If it incorrectly
+	 * thinks that SYSRET is appropriate when RIP is noncanonical,
+	 * it'll crash on Intel CPUs.
+	 */
+	sethandler(SIGUSR1, sigusr1, 0);
+	for (int i = 47; i < 64; i++)
+		test_sigreturn_to(1UL<<i);
+
+	clearhandler(SIGUSR1);
+
+	sethandler(SIGSEGV, sigsegv_for_fallthrough, 0);
+
+	/* One extra test to check that we didn't screw up the mremap logic. */
+	test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE);
+
+	/* These are the interesting cases. */
+	for (int i = 47; i < 64; i++) {
+		test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE);
+		test_syscall_fallthrough_to(1UL<<i);
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3


From c1878f7a89efbbe1ac0082d09b2928782a6ceba1 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 5 Jan 2017 11:06:22 -0800
Subject: tools: psock_tpacket: block Rx until socket filter has been added and
 socket has been bound to loopback.

Packets from any/all interfaces may be queued up on the PF_PACKET socket
before it is bound to the loopback interface by psock_tpacket, and
when these are passed up by the kernel, they could interfere
with the Rx tests.

Avoid interference from spurious packet by blocking Rx until the
socket filter has been set up, and the packet has been bound to the
desired (lo) interface. The effective sequence is
	socket(PF_PACKET, SOCK_RAW, 0);
	set up ring
	Invoke SO_ATTACH_FILTER
	bind to sll_protocol set to ETH_P_ALL, sll_ifindex for lo
After this sequence, the only packets that will be passed up are
those received on loopback that pass the attached filter.

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_tpacket.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 4a1bc648b35e..7f6cd9fdacf3 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -110,7 +110,7 @@ static unsigned int total_packets, total_bytes;
 
 static int pfsocket(int ver)
 {
-	int ret, sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	int ret, sock = socket(PF_PACKET, SOCK_RAW, 0);
 	if (sock == -1) {
 		perror("socket");
 		exit(1);
@@ -239,7 +239,6 @@ static void walk_v1_v2_rx(int sock, struct ring *ring)
 	bug_on(ring->type != PACKET_RX_RING);
 
 	pair_udp_open(udp_sock, PORT_BASE);
-	pair_udp_setfilter(sock);
 
 	memset(&pfd, 0, sizeof(pfd));
 	pfd.fd = sock;
@@ -601,7 +600,6 @@ static void walk_v3_rx(int sock, struct ring *ring)
 	bug_on(ring->type != PACKET_RX_RING);
 
 	pair_udp_open(udp_sock, PORT_BASE);
-	pair_udp_setfilter(sock);
 
 	memset(&pfd, 0, sizeof(pfd));
 	pfd.fd = sock;
@@ -741,6 +739,8 @@ static void bind_ring(int sock, struct ring *ring)
 {
 	int ret;
 
+	pair_udp_setfilter(sock);
+
 	ring->ll.sll_family = PF_PACKET;
 	ring->ll.sll_protocol = htons(ETH_P_ALL);
 	ring->ll.sll_ifindex = if_nametoindex("lo");
-- 
cgit v1.2.3


From 88baa78d1f318e57c7cccbfe55d485befd1ce696 Mon Sep 17 00:00:00 2001
From: "bamvor.zhangjian@huawei.com" <bamvor.zhangjian@huawei.com>
Date: Tue, 29 Nov 2016 19:55:47 +0800
Subject: selftests: remove duplicated all and clean target

Currently, kselftest use TEST_PROGS, TEST_PROGS_EXTENDED, TEST_FILES to
indicate the test program, extended test program and test files. It is
easy to understand the purpose of these files. But mix of compiled and
uncompiled files lead to duplicated "all" and "clean" targets.

In order to remove the duplicated targets, introduce TEST_GEN_PROGS,
TEST_GEN_PROGS_EXTENDED, TEST_GEN_FILES to indicate the compiled
objects.

Also, the later patch will make use of TEST_GEN_XXX to redirect these
files to output directory indicated by KBUILD_OUTPUT or O.

And add this changes to "Contributing new tests(details)" of
Documentation/kselftest.txt.

Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 Documentation/kselftest.txt                        | 12 +++++++++++
 tools/testing/selftests/bpf/Makefile               | 10 ++-------
 tools/testing/selftests/breakpoints/Makefile       | 10 +++------
 tools/testing/selftests/capabilities/Makefile      | 11 ++--------
 tools/testing/selftests/efivarfs/Makefile          |  8 +-------
 tools/testing/selftests/exec/Makefile              | 10 ++++-----
 tools/testing/selftests/futex/functional/Makefile  | 12 +++--------
 tools/testing/selftests/ipc/Makefile               |  7 +------
 tools/testing/selftests/kcmp/Makefile              |  6 ++----
 tools/testing/selftests/lib.mk                     | 18 ++++++++++++----
 tools/testing/selftests/membarrier/Makefile        |  6 +-----
 tools/testing/selftests/memfd/Makefile             | 14 +++----------
 tools/testing/selftests/mount/Makefile             |  6 ++----
 tools/testing/selftests/mqueue/Makefile            |  6 +-----
 tools/testing/selftests/net/Makefile               | 13 ++++--------
 tools/testing/selftests/nsfs/Makefile              |  9 +-------
 tools/testing/selftests/powerpc/alignment/Makefile |  9 ++------
 .../testing/selftests/powerpc/benchmarks/Makefile  | 11 +++-------
 .../selftests/powerpc/context_switch/Makefile      |  9 ++------
 tools/testing/selftests/powerpc/copyloops/Makefile | 11 +++-------
 tools/testing/selftests/powerpc/dscr/Makefile      | 13 ++++--------
 tools/testing/selftests/powerpc/math/Makefile      | 13 ++++--------
 tools/testing/selftests/powerpc/mm/Makefile        | 12 ++++-------
 tools/testing/selftests/powerpc/pmu/Makefile       | 12 +++++------
 tools/testing/selftests/powerpc/pmu/ebb/Makefile   | 11 +++-------
 .../testing/selftests/powerpc/primitives/Makefile  |  9 ++------
 .../testing/selftests/powerpc/stringloops/Makefile |  9 ++------
 .../selftests/powerpc/switch_endian/Makefile       |  8 +++-----
 tools/testing/selftests/powerpc/syscalls/Makefile  |  9 ++------
 tools/testing/selftests/powerpc/tm/Makefile        | 11 +++-------
 tools/testing/selftests/powerpc/vphn/Makefile      | 10 +++------
 tools/testing/selftests/ptrace/Makefile            |  8 +-------
 tools/testing/selftests/seccomp/Makefile           |  6 +-----
 tools/testing/selftests/sigaltstack/Makefile       |  5 +----
 tools/testing/selftests/size/Makefile              |  6 +-----
 tools/testing/selftests/timers/Makefile            |  9 ++------
 tools/testing/selftests/vm/Makefile                | 24 +++++++++-------------
 37 files changed, 118 insertions(+), 255 deletions(-)

(limited to 'tools/testing')

diff --git a/Documentation/kselftest.txt b/Documentation/kselftest.txt
index e5c7254e73d7..d431dc82c228 100644
--- a/Documentation/kselftest.txt
+++ b/Documentation/kselftest.txt
@@ -95,3 +95,15 @@ In general, the rules for selftests are
 
  * Don't cause the top-level "make run_tests" to fail if your feature is
    unconfigured.
+
+Contributing new tests(details)
+===============================
+
+ * Use TEST_GEN_XXX if such binaries or files are generated during
+   compiling.
+   TEST_PROGS, TEST_GEN_PROGS mean it is the excutable tested by
+   default.
+   TEST_PROGS_EXTENDED, TEST_GEN_PROGS_EXTENDED mean it is the
+   executable which is not tested by default.
+   TEST_FILES, TEST_GEN_FILES mean it is the file which is used by
+   test.
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7a5f24543a5f..058351b0694f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,13 +1,7 @@
 CFLAGS += -Wall -O2 -I../../../../usr/include
 
-test_objs = test_verifier test_maps test_lru_map
+TEST_GEN_PROGS = test_verifier test_maps test_lru_map
 
-TEST_PROGS := test_verifier test_maps test_lru_map test_kmod.sh
-TEST_FILES := $(test_objs)
-
-all: $(test_objs)
+TEST_PROGS := test_kmod.sh
 
 include ../lib.mk
-
-clean:
-	$(RM) $(test_objs)
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index 61b79e8df1f4..72aa103e4141 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -3,17 +3,13 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
 ifeq ($(ARCH),x86)
-TEST_PROGS := breakpoint_test
+TEST_GEN_PROGS := breakpoint_test
 endif
 ifeq ($(ARCH),aarch64)
-TEST_PROGS := breakpoint_test_arm64
+TEST_GEN_PROGS := breakpoint_test_arm64
 endif
 
-TEST_PROGS += step_after_suspend_test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS += step_after_suspend_test
 
 include ../lib.mk
 
-clean:
-	rm -fr breakpoint_test breakpoint_test_arm64 step_after_suspend_test
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
index 008602aed920..29b8adfdac71 100644
--- a/tools/testing/selftests/capabilities/Makefile
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -1,15 +1,8 @@
-TEST_FILES := validate_cap
-TEST_PROGS := test_execve
-
-BINARIES := $(TEST_FILES) $(TEST_PROGS)
+TEST_GEN_FILES := validate_cap
+TEST_GEN_PROGS := test_execve
 
 CFLAGS += -O2 -g -std=gnu99 -Wall
 LDLIBS += -lcap-ng -lrt -ldl
 
-all: $(BINARIES)
-
-clean:
-	$(RM) $(BINARIES)
-
 include ../lib.mk
 
diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
index 736c3ddfc787..c49dcea69319 100644
--- a/tools/testing/selftests/efivarfs/Makefile
+++ b/tools/testing/selftests/efivarfs/Makefile
@@ -1,13 +1,7 @@
 CFLAGS = -Wall
 
-test_objs = open-unlink create-read
-
-all: $(test_objs)
-
+TEST_GEN_FILES := open-unlink create-read
 TEST_PROGS := efivarfs.sh
-TEST_FILES := $(test_objs)
 
 include ../lib.mk
 
-clean:
-	rm -f $(test_objs)
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index d4300602bf37..b3bf091368ca 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,7 +1,4 @@
 CFLAGS = -Wall
-BINARIES = execveat
-DEPS = execveat.symlink execveat.denatured script subdir
-all: $(BINARIES) $(DEPS)
 
 subdir:
 	mkdir -p $@
@@ -17,11 +14,12 @@ execveat.denatured: execveat
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^
 
-TEST_PROGS := execveat
+TEST_GEN_PROGS := execveat
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 # Makefile is a run-time dependency, since it's accessed by the execveat test
-TEST_FILES := $(DEPS) Makefile
+TEST_FILES := Makefile
 
 include ../lib.mk
 
 clean:
-	rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx*
+	rm -rf $(TEST_GEN_PROGS) $(TEST_GEN_FILES) subdir.moved execveat.moved xxxxx*
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 9d6b75ef7b5d..ac35782ce1e5 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -3,7 +3,7 @@ CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
 LDFLAGS := $(LDFLAGS) -pthread -lrt
 
 HEADERS := ../include/futextest.h
-TARGETS := \
+TEST_GEN_FILES := \
 	futex_wait_timeout \
 	futex_wait_wouldblock \
 	futex_requeue_pi \
@@ -12,14 +12,8 @@ TARGETS := \
 	futex_wait_uninitialized_heap \
 	futex_wait_private_mapped_file
 
-TEST_PROGS := $(TARGETS) run.sh
-
-.PHONY: all clean
-all: $(TARGETS)
-
-$(TARGETS): $(HEADERS)
+TEST_PROGS := run.sh
 
 include ../../lib.mk
 
-clean:
-	rm -f $(TARGETS)
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
index 25d2e702c68a..30ef4c7f53ea 100644
--- a/tools/testing/selftests/ipc/Makefile
+++ b/tools/testing/selftests/ipc/Makefile
@@ -11,12 +11,7 @@ endif
 
 CFLAGS += -I../../../../usr/include/
 
-all:
-	$(CC) $(CFLAGS) msgque.c -o msgque_test
-
-TEST_PROGS := msgque_test
+TEST_GEN_PROGS := msgque
 
 include ../lib.mk
 
-clean:
-	rm -fr ./msgque_test
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index 2ae7450a9a89..9e8b673d41b1 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -1,10 +1,8 @@
 CFLAGS += -I../../../../usr/include/
 
-all: kcmp_test
-
-TEST_PROGS := kcmp_test
+TEST_GEN_PROGS := kcmp_test
 
 include ../lib.mk
 
 clean:
-	$(RM) kcmp_test kcmp-test-file
+	$(RM) $(TEST_GEN_PROGS) kcmp-test-file
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 50a93f5f13d6..9fddffd066ca 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -3,7 +3,7 @@
 CC := $(CROSS_COMPILE)gcc
 
 define RUN_TESTS
-	@for TEST in $(TEST_PROGS); do \
+	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
 		(./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
 	done;
 endef
@@ -14,8 +14,13 @@ run_tests: all
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
 		mkdir -p ${INSTALL_PATH};										\
-		echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";	\
-		rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;		\
+		echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";       \
+		rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;              \
+	fi
+	@if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
+		mkdir -p ${INSTALL_PATH};										\
+		echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/";	\
+		rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/;		\
 	fi
 endef
 
@@ -27,7 +32,7 @@ else
 endif
 
 define EMIT_TESTS
-	@for TEST in $(TEST_PROGS); do \
+	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
 		echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
 	done;
 endef
@@ -35,4 +40,9 @@ endef
 emit_tests:
 	$(EMIT_TESTS)
 
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+
+clean:
+	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+
 .PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
index a1a97085847d..02845532b059 100644
--- a/tools/testing/selftests/membarrier/Makefile
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -1,10 +1,6 @@
 CFLAGS += -g -I../../../../usr/include/
 
-TEST_PROGS := membarrier_test
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := membarrier_test
 
 include ../lib.mk
 
-clean:
-	$(RM) $(TEST_PROGS)
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index fd396ac811b6..2c87f2376e59 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -4,19 +4,11 @@ CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
 CFLAGS += -I../../../../usr/include/
 
-TEST_PROGS := memfd_test
-
-all: $(TEST_PROGS)
-
-include ../lib.mk
-
-build_fuse: fuse_mnt fuse_test
+TEST_PROGS := run_fuse_test.sh
+TEST_GEN_FILES := memfd_test fuse_mnt fuse_test
 
 fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
 fuse_mnt: LDFLAGS += $(shell pkg-config fuse --libs)
 
-run_fuse: build_fuse
-	@./run_fuse_test.sh || echo "fuse_test: [FAIL]"
+include ../lib.mk
 
-clean:
-	$(RM) memfd_test fuse_test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index 5e35c9c50b72..e8fb15e80172 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -1,14 +1,14 @@
 # Makefile for mount selftests.
 CFLAGS = -Wall \
          -O2
-all: unprivileged-remount-test
 
 unprivileged-remount-test: unprivileged-remount-test.c
 	$(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
 
+TEST_GEN_PROGS := unprivileged-remount-test
+
 include ../lib.mk
 
-TEST_PROGS := unprivileged-remount-test
 override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
 		      then	\
 				./unprivileged-remount-test ; \
@@ -17,5 +17,3 @@ override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
 		      fi
 override EMIT_TESTS := echo "$(RUN_TESTS)"
 
-clean:
-	rm -f unprivileged-remount-test
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
index eebac29acbd9..79a664aeb8d7 100644
--- a/tools/testing/selftests/mqueue/Makefile
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -1,8 +1,6 @@
 CFLAGS += -O2
 LDLIBS = -lrt -lpthread -lpopt
-TEST_PROGS := mq_open_tests mq_perf_tests
-
-all: $(TEST_PROGS)
+TEST_GEN_PROGS := mq_open_tests mq_perf_tests
 
 include ../lib.mk
 
@@ -16,5 +14,3 @@ override define EMIT_TESTS
 	echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
 endef
 
-clean:
-	rm -f mq_open_tests mq_perf_tests
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index e24e4c82542e..fe5b36dd7140 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -3,20 +3,15 @@
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
-NET_PROGS =  socket
-NET_PROGS += psock_fanout psock_tpacket
-NET_PROGS += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-NET_PROGS += reuseport_dualstack
-
-all: $(NET_PROGS)
 reuseport_bpf_numa: LDFLAGS += -lnuma
 %: %.c
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
-TEST_FILES := $(NET_PROGS)
+TEST_GEN_FILES =  socket
+TEST_GEN_FILES += psock_fanout psock_tpacket
+TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_FILES += reuseport_dualstack
 
 include ../lib.mk
 
-clean:
-	$(RM) $(NET_PROGS)
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile
index 2306054a901a..9ff7c7f80625 100644
--- a/tools/testing/selftests/nsfs/Makefile
+++ b/tools/testing/selftests/nsfs/Makefile
@@ -1,12 +1,5 @@
-TEST_PROGS := owner pidns
+TEST_GEN_PROGS := owner pidns
 
 CFLAGS := -Wall -Werror
 
-all: owner pidns
-owner: owner.c
-pidns: pidns.c
-
-clean:
-	$(RM) owner pidns
-
 include ../lib.mk
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index ad6a4e49da91..16b22004e75f 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -1,10 +1,5 @@
-TEST_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c
+TEST_GEN_PROGS := copy_unaligned copy_first_unaligned paste_unaligned paste_last_unaligned
 
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c ../utils.c copy_paste_unaligned_common.c
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index 545077f98f72..08a55bd17800 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -1,16 +1,11 @@
-TEST_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
+TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall
 
 CFLAGS += -O2
 
-all: $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c
 
-$(TEST_PROGS): ../harness.c
+include ../../lib.mk
 
 context_switch: ../utils.c
 context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
 context_switch: LDLIBS += -lpthread
-
-include ../../lib.mk
-
-clean:
-	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/context_switch/Makefile b/tools/testing/selftests/powerpc/context_switch/Makefile
index e164d1466466..e9351bb4285d 100644
--- a/tools/testing/selftests/powerpc/context_switch/Makefile
+++ b/tools/testing/selftests/powerpc/context_switch/Makefile
@@ -1,10 +1,5 @@
-TEST_PROGS := cp_abort
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c ../utils.c
+TEST_GEN_PROGS := cp_abort
 
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 384843ea0d40..9ad1558e2ea9 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -7,19 +7,14 @@ CFLAGS += -maltivec
 # Use our CFLAGS for the implicit .S rule
 ASFLAGS = $(CFLAGS)
 
-TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
 
-all: $(TEST_PROGS)
+include ../../lib.mk
 
 copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
 copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
 memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
 memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
 
-$(TEST_PROGS): $(EXTRA_SOURCES)
-
-include ../../lib.mk
-
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 49327ee84e3a..4262de42017b 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -1,14 +1,9 @@
-TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test	\
+TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test	\
 	      dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test	\
 	      dscr_sysfs_thread_test
 
-dscr_default_test: LDLIBS += -lpthread
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+dscr_default_test: LDLIBS += -lpthread
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index a505b66d408a..814c38591b13 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,9 +1,9 @@
-TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
-all: $(TEST_PROGS)
+include ../../lib.mk
 
-$(TEST_PROGS): ../harness.c
-$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
 
 fpu_syscall: fpu_asm.S
 fpu_preempt: fpu_asm.S
@@ -15,8 +15,3 @@ vmx_signal: vmx_asm.S
 
 vsx_preempt: CFLAGS += -mvsx
 vsx_preempt: vsx_asm.S
-
-include ../../lib.mk
-
-clean:
-	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 3bdb96eae558..d5633783c82e 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -1,19 +1,15 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
-TEST_FILES := tempfile
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao
+TEST_GEN_FILES := tempfile
 
-all: $(TEST_PROGS) $(TEST_FILES)
+include ../../lib.mk
 
-$(TEST_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c
 
 prot_sao: ../utils.c
 
-include ../../lib.mk
-
 tempfile:
 	dd if=/dev/zero of=tempfile bs=64k count=1
 
-clean:
-	rm -f $(TEST_PROGS) tempfile
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index ac41a7177f2e..ab0f9026c21c 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -1,12 +1,14 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
 
-all: $(TEST_PROGS) ebb
+include ../../lib.mk
+
+all: $(TEST_GEN_PROGS) ebb
 
-$(TEST_PROGS): $(EXTRA_SOURCES)
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
 count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
@@ -14,8 +16,6 @@ count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
 
 per_event_excludes: ../utils.c
 
-include ../../lib.mk
-
 DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
 	$(DEFAULT_RUN_TESTS)
@@ -35,7 +35,7 @@ override define INSTALL_RULE
 endef
 
 clean:
-	rm -f $(TEST_PROGS) loop.o
+	$(RM) $(TEST_PROGS) loop.o
 	$(MAKE) -C ebb clean
 
 ebb:
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 8d2279c4bb4b..0bc2bd6db511 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -4,7 +4,7 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
-TEST_PROGS := reg_access_test event_attributes_test cycles_test	\
+TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 cycles_with_freeze_test pmc56_overflow_test		\
 	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
 	 cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test	\
@@ -16,16 +16,11 @@ TEST_PROGS := reg_access_test event_attributes_test cycles_test	\
 	 lost_exception_test no_handler_test			\
 	 cycles_with_mmcr2_test
 
-all: $(TEST_PROGS)
+include ../../../lib.mk
 
-$(TEST_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
 	       ebb.c ebb_handler.S trace.c busy_loop.S
 
 instruction_count_test: ../loop.S
 
 lost_exception_test: ../lib.c
-
-include ../../../lib.mk
-
-clean:
-	rm -f $(TEST_PROGS)
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index b68c6221d3d1..175366db7be8 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -1,12 +1,7 @@
 CFLAGS += -I$(CURDIR)
 
-TEST_PROGS := load_unaligned_zeropad
-
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
+TEST_GEN_PROGS := load_unaligned_zeropad
 
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 2a728f4d2873..557b9379f3bb 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -2,14 +2,9 @@
 CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 
-TEST_PROGS := memcmp
+TEST_GEN_PROGS := memcmp
 EXTRA_SOURCES := memcmp_64.S ../harness.c
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): $(EXTRA_SOURCES)
-
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index e21d10674e54..bd0122306da6 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -1,8 +1,8 @@
-TEST_PROGS := switch_endian_test
+TEST_GEN_PROGS := switch_endian_test
 
 ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
-all: $(TEST_PROGS)
+include ../../lib.mk
 
 switch_endian_test: check-reversed.S
 
@@ -12,7 +12,5 @@ check-reversed.o: check.o
 check-reversed.S: check-reversed.o
 	hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
 
-include ../../lib.mk
-
 clean:
-	rm -f $(TEST_PROGS) *.o check-reversed.S
+	$(RM) $(TEST_GEN_PROGS) *.o check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index b35c7945bec5..da22ca7c38c1 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,12 +1,7 @@
-TEST_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed
 
 CFLAGS += -I../../../../../usr/include
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS) *.o
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index c6c53c82fdd6..117c6247928a 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -1,12 +1,12 @@
 SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu \
 	tm-signal-context-chk-vmx tm-signal-context-chk-vsx
 
-TEST_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
+TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
 	tm-vmxcopy tm-fork tm-tar tm-tmspr $(SIGNAL_CONTEXT_CHK_TESTS)
 
-all: $(TEST_PROGS)
+include ../../lib.mk
 
-$(TEST_PROGS): ../harness.c ../utils.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 CFLAGS += -mhtm
 
@@ -16,8 +16,3 @@ tm-tmspr: CFLAGS += -pthread
 
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
 $(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
-
-include ../../lib.mk
-
-clean:
-	rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index a485f2e286ae..f8ced26748f8 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -1,12 +1,8 @@
-TEST_PROGS := test-vphn
+TEST_GEN_PROGS := test-vphn
 
 CFLAGS += -m64
 
-all: $(TEST_PROGS)
-
-$(TEST_PROGS): ../harness.c
-
 include ../../lib.mk
 
-clean:
-	rm -f $(TEST_PROGS)
+$(TEST_GEN_PROGS): ../harness.c
+
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index 453927fea90c..8a2bc5562179 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,11 +1,5 @@
 CFLAGS += -iquote../../../../include/uapi -Wall
-peeksiginfo: peeksiginfo.c
 
-all: peeksiginfo
-
-clean:
-	rm -f peeksiginfo
-
-TEST_PROGS := peeksiginfo
+TEST_GEN_PROGS := peeksiginfo
 
 include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 8401e87e34e1..5fa6fd2246b1 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,10 +1,6 @@
-TEST_PROGS := seccomp_bpf
+TEST_GEN_PROGS := seccomp_bpf
 CFLAGS += -Wl,-no-as-needed -Wall
 LDFLAGS += -lpthread
 
-all: $(TEST_PROGS)
-
 include ../lib.mk
 
-clean:
-	$(RM) $(TEST_PROGS)
diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
index 56af56eda6fa..f68fbf80d8be 100644
--- a/tools/testing/selftests/sigaltstack/Makefile
+++ b/tools/testing/selftests/sigaltstack/Makefile
@@ -1,8 +1,5 @@
 CFLAGS = -Wall
-BINARIES = sas
-all: $(BINARIES)
+TEST_GEN_PROGS = sas
 
 include ../lib.mk
 
-clean:
-	rm -rf $(BINARIES)
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
index bbd0b5398b61..c67f3577e888 100644
--- a/tools/testing/selftests/size/Makefile
+++ b/tools/testing/selftests/size/Makefile
@@ -1,11 +1,7 @@
-all: get_size
 
 get_size: get_size.c
 	$(CC) -static -ffreestanding -nostartfiles -s $< -o $@
 
-TEST_PROGS := get_size
+TEST_GEN_PROGS := get_size
 
 include ../lib.mk
-
-clean:
-	$(RM) get_size
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 1d5556869137..4a3bffed9901 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -5,16 +5,13 @@ LDFLAGS += -lrt -lpthread
 
 # these are all "safe" tests that don't modify
 # system time or require escalated privledges
-TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
 	     inconsistency-check raw_skew threadtest rtctest
 
-TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+TEST_GEN_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 		      skew_consistency clocksource-switch leap-a-day \
 		      leapcrash set-tai set-2038 set-tz
 
-bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED)
-
-all: ${bins}
 
 include ../lib.mk
 
@@ -34,5 +31,3 @@ run_destructive_tests: run_tests
 	./set-tai
 	./set-2038
 
-clean:
-	rm -f ${bins}
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index bbab7f4664ac..323383ab4581 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,18 +1,17 @@
 # Makefile for vm selftests
 
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
-BINARIES = compaction_test
-BINARIES += hugepage-mmap
-BINARIES += hugepage-shm
-BINARIES += map_hugetlb
-BINARIES += mlock2-tests
-BINARIES += on-fault-limit
-BINARIES += thuge-gen
-BINARIES += transhuge-stress
-BINARIES += userfaultfd
-BINARIES += mlock-random-test
+TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += mlock-random-test
 
-all: $(BINARIES)
 %: %.c
 	$(CC) $(CFLAGS) -o $@ $^ -lrt
 userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
@@ -25,9 +24,6 @@ mlock-random-test: mlock-random-test.c
 	make -C ../../../.. headers_install
 
 TEST_PROGS := run_vmtests
-TEST_FILES := $(BINARIES)
 
 include ../lib.mk
 
-clean:
-	$(RM) $(BINARIES)
-- 
cgit v1.2.3


From 5fe59799352b2e2e2dfcf84585c6d780d6effb0e Mon Sep 17 00:00:00 2001
From: "bamvor.zhangjian@huawei.com" <bamvor.zhangjian@huawei.com>
Date: Tue, 29 Nov 2016 19:55:48 +0800
Subject: selftests: remove useless TEST_DIRS

The TEST_DIRS was introduced in Commit e8c1d7cdf137 ("selftests: copy
TEST_DIRS to INSTALL_PATH") for coping a whole directory in ftrace.

After rsync(with -a) is introduced by Commit 900d65ee11aa ("selftests:
change install command to rsync"). Rsync could handle the directory
without the definition of TEST_DIRS.

This patch simply replace TEST_DIRS with TEST_FILES in ftrace and remove
the TEST_DIRS in tools/testing/selftest/lib.mk

Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/ftrace/Makefile | 2 +-
 tools/testing/selftests/lib.mk          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 4e6ed13e7f66..a71530097352 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,7 +1,7 @@
 all:
 
 TEST_PROGS := ftracetest
-TEST_DIRS := test.d
+TEST_FILES := test.d
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 9fddffd066ca..5b8634cad092 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -14,8 +14,8 @@ run_tests: all
 define INSTALL_RULE
 	@if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then					\
 		mkdir -p ${INSTALL_PATH};										\
-		echo "rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";       \
-		rsync -a $(TEST_DIRS) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;              \
+		echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/";	\
+		rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/;		\
 	fi
 	@if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then					\
 		mkdir -p ${INSTALL_PATH};										\
-- 
cgit v1.2.3


From 7d758af257bc40a8a599a0aaa4768b30fb463e9c Mon Sep 17 00:00:00 2001
From: "bamvor.zhangjian@huawei.com" <bamvor.zhangjian@huawei.com>
Date: Tue, 29 Nov 2016 19:55:49 +0800
Subject: selftests: add default rules for c source file

There are difference rules for compiling c source file in different
testcases. In order to enable KBUILD_OUTPUT support in later patch,
this patch introduce the default rules in
"tools/testing/selftest/lib.mk" and remove the existing rules in each
testcase.

Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/exec/Makefile  |  2 --
 tools/testing/selftests/lib.mk         |  3 +++
 tools/testing/selftests/mount/Makefile |  3 ---
 tools/testing/selftests/net/Makefile   |  2 --
 tools/testing/selftests/size/Makefile  |  4 +---
 tools/testing/selftests/vm/Makefile    | 17 ++++++-----------
 6 files changed, 10 insertions(+), 21 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index b3bf091368ca..9eb1c3e4d339 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -11,8 +11,6 @@ execveat.symlink: execveat
 execveat.denatured: execveat
 	cp $< $@
 	chmod -x $@
-%: %.c
-	$(CC) $(CFLAGS) -o $@ $^
 
 TEST_GEN_PROGS := execveat
 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 5b8634cad092..68d7c01b9f06 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -45,4 +45,7 @@ all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 clean:
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
+%: %.c
+	$(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -o $@ $^
+
 .PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index e8fb15e80172..9093d7ffe87f 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -2,9 +2,6 @@
 CFLAGS = -Wall \
          -O2
 
-unprivileged-remount-test: unprivileged-remount-test.c
-	$(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
-
 TEST_GEN_PROGS := unprivileged-remount-test
 
 include ../lib.mk
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index fe5b36dd7140..fbfe5d0d5c2e 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,8 +4,6 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 reuseport_bpf_numa: LDFLAGS += -lnuma
-%: %.c
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
 TEST_GEN_FILES =  socket
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
index c67f3577e888..4685b3e421fc 100644
--- a/tools/testing/selftests/size/Makefile
+++ b/tools/testing/selftests/size/Makefile
@@ -1,6 +1,4 @@
-
-get_size: get_size.c
-	$(CC) -static -ffreestanding -nostartfiles -s $< -o $@
+CFLAGS := -static -ffreestanding -nostartfiles -s
 
 TEST_GEN_PROGS := get_size
 
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 323383ab4581..8141d5169f4d 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,7 @@
 # Makefile for vm selftests
 
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
 TEST_GEN_FILES += hugepage-mmap
 TEST_GEN_FILES += hugepage-shm
@@ -12,18 +13,12 @@ TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
 TEST_GEN_FILES += mlock-random-test
 
-%: %.c
-	$(CC) $(CFLAGS) -o $@ $^ -lrt
-userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
-	$(CC) $(CFLAGS) -O2 -o $@ $< -lpthread
-
-mlock-random-test: mlock-random-test.c
-	$(CC) $(CFLAGS) -o $@ $< -lcap
-
-../../../../usr/include/linux/kernel.h:
-	make -C ../../../.. headers_install
-
 TEST_PROGS := run_vmtests
 
 include ../lib.mk
 
+userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
+mlock-random-test: LDLIBS += -lcap
+
+../../../../usr/include/linux/kernel.h:
+	make -C ../../../.. headers_install
-- 
cgit v1.2.3


From 5a2d4a5763c96a19170f81f2c4bb3cc9515f854a Mon Sep 17 00:00:00 2001
From: "bamvor.zhangjian@huawei.com" <bamvor.zhangjian@huawei.com>
Date: Tue, 29 Nov 2016 19:55:50 +0800
Subject: selftests: remove CROSS_COMPILE in dedicated Makefile

After previous clean up patches, memfd and timers could get
CROSS_COMPILE from tools/testing/selftest/lib.mk. There is no need to
preserve these definition. So, this patch remove them.

Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/memfd/Makefile  | 1 -
 tools/testing/selftests/timers/Makefile | 1 -
 2 files changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 2c87f2376e59..79891d033de1 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -1,4 +1,3 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS += -D_FILE_OFFSET_BITS=64
 CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 4a3bffed9901..b90e50c36f9f 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -1,4 +1,3 @@
-CC = $(CROSS_COMPILE)gcc
 BUILD_FLAGS = -DKTEST
 CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
 LDFLAGS += -lrt -lpthread
-- 
cgit v1.2.3


From 80d443e8876602be2c130f79c4de81e12e2a700d Mon Sep 17 00:00:00 2001
From: "bamvor.zhangjian@huawei.com" <bamvor.zhangjian@huawei.com>
Date: Tue, 29 Nov 2016 19:55:51 +0800
Subject: selftests: add EXTRA_CLEAN for clean target

Some testcases need the clean extra data after running. This patch
introduce the "EXTRA_CLEAN" variable to address this requirement.

After KBUILD_OUTPUT is enabled in later patch, it will be easy to
decide to if we need do the cleanup in the KBUILD_OUTPUT path(if the
testcase ran immediately after compiled).

Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/exec/Makefile                 | 19 +++++++++----------
 tools/testing/selftests/ftrace/Makefile               |  4 +---
 tools/testing/selftests/kcmp/Makefile                 |  4 ++--
 tools/testing/selftests/lib.mk                        |  2 +-
 .../testing/selftests/powerpc/switch_endian/Makefile  |  5 ++---
 tools/testing/selftests/pstore/Makefile               |  4 +---
 tools/testing/selftests/zram/Makefile                 |  3 +--
 7 files changed, 17 insertions(+), 24 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 9eb1c3e4d339..48d1f863d584 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,5 +1,14 @@
 CFLAGS = -Wall
 
+TEST_GEN_PROGS := execveat
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+# Makefile is a run-time dependency, since it's accessed by the execveat test
+TEST_FILES := Makefile
+
+EXTRA_CLEAN := subdir.moved execveat.moved xxxxx*
+
+include ../lib.mk
+
 subdir:
 	mkdir -p $@
 script:
@@ -11,13 +20,3 @@ execveat.symlink: execveat
 execveat.denatured: execveat
 	cp $< $@
 	chmod -x $@
-
-TEST_GEN_PROGS := execveat
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
-# Makefile is a run-time dependency, since it's accessed by the execveat test
-TEST_FILES := Makefile
-
-include ../lib.mk
-
-clean:
-	rm -rf $(TEST_GEN_PROGS) $(TEST_GEN_FILES) subdir.moved execveat.moved xxxxx*
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index a71530097352..6c64b421a4fd 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -2,8 +2,6 @@ all:
 
 TEST_PROGS := ftracetest
 TEST_FILES := test.d
+EXTRA_CLEAN := logs/*
 
 include ../lib.mk
-
-clean:
-	rm -rf logs/*
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index 9e8b673d41b1..74a8adda58fb 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -2,7 +2,7 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_GEN_PROGS := kcmp_test
 
+EXTRA_CLEAN := kcmp-test-file
+
 include ../lib.mk
 
-clean:
-	$(RM) $(TEST_GEN_PROGS) kcmp-test-file
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 68d7c01b9f06..0f7a371beff7 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -43,7 +43,7 @@ emit_tests:
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
 clean:
-	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 
 %: %.c
 	$(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -o $@ $^
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index bd0122306da6..dbd05ac8dbf7 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -2,6 +2,8 @@ TEST_GEN_PROGS := switch_endian_test
 
 ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
+EXTRA_CLEAN = *.o check-reversed.S
+
 include ../../lib.mk
 
 switch_endian_test: check-reversed.S
@@ -11,6 +13,3 @@ check-reversed.o: check.o
 
 check-reversed.S: check-reversed.o
 	hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
-
-clean:
-	$(RM) $(TEST_GEN_PROGS) *.o check-reversed.S
diff --git a/tools/testing/selftests/pstore/Makefile b/tools/testing/selftests/pstore/Makefile
index bd7abe24ea08..c5f2440ba1f7 100644
--- a/tools/testing/selftests/pstore/Makefile
+++ b/tools/testing/selftests/pstore/Makefile
@@ -5,11 +5,9 @@ all:
 
 TEST_PROGS := pstore_tests pstore_post_reboot_tests
 TEST_FILES := common_tests pstore_crash_test
+EXTRA_CLEAN := logs/* *uuid
 
 include ../lib.mk
 
 run_crash:
 	@sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; }
-
-clean:
-	rm -rf logs/* *uuid
diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile
index 29d80346e3eb..c3a87e5f9d36 100644
--- a/tools/testing/selftests/zram/Makefile
+++ b/tools/testing/selftests/zram/Makefile
@@ -2,8 +2,7 @@ all:
 
 TEST_PROGS := zram.sh
 TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+EXTRA_CLEAN := err.log
 
 include ../lib.mk
 
-clean:
-	$(RM) err.log
-- 
cgit v1.2.3


From a8ba798bc8ec663cf02e80b0dd770324de9bafd9 Mon Sep 17 00:00:00 2001
From: "bamvor.zhangjian@huawei.com" <bamvor.zhangjian@huawei.com>
Date: Tue, 29 Nov 2016 19:55:52 +0800
Subject: selftests: enable O and KBUILD_OUTPUT

Enable O and KBUILD_OUTPUT for kselftest. User could compile kselftest
to another directory by passing O or KBUILD_OUTPUT. And O is high
priority than KBUILD_OUTPUT.

Signed-off-by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile                   | 36 ++++++++++++++++------
 tools/testing/selftests/exec/Makefile              | 13 ++++----
 tools/testing/selftests/ftrace/Makefile            |  2 +-
 tools/testing/selftests/futex/Makefile             | 21 ++++++++++---
 tools/testing/selftests/kcmp/Makefile              |  2 +-
 tools/testing/selftests/lib.mk                     | 19 +++++++++---
 tools/testing/selftests/powerpc/Makefile           | 15 +++++----
 .../testing/selftests/powerpc/benchmarks/Makefile  |  6 ++--
 tools/testing/selftests/powerpc/copyloops/Makefile |  8 ++---
 tools/testing/selftests/powerpc/dscr/Makefile      |  2 +-
 tools/testing/selftests/powerpc/math/Makefile      | 12 ++++----
 tools/testing/selftests/powerpc/mm/Makefile        |  6 ++--
 tools/testing/selftests/powerpc/pmu/Makefile       | 16 +++++-----
 tools/testing/selftests/powerpc/pmu/ebb/Makefile   |  4 +--
 .../selftests/powerpc/switch_endian/Makefile       |  8 ++---
 tools/testing/selftests/powerpc/tm/Makefile        |  6 ++--
 tools/testing/selftests/vm/Makefile                |  4 +--
 tools/testing/selftests/x86/Makefile               | 17 +++++-----
 18 files changed, 122 insertions(+), 75 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 71b05891a6a1..2741f9e35215 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -49,29 +49,44 @@ override LDFLAGS =
 override MAKEFLAGS =
 endif
 
+BUILD := $(O)
+ifndef BUILD
+  BUILD := $(KBUILD_OUTPUT)
+endif
+ifndef BUILD
+  BUILD := $(shell pwd)
+endif
+
+export BUILD
 all:
-	for TARGET in $(TARGETS); do \
-		make -C $$TARGET; \
+	for TARGET in $(TARGETS); do		\
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_tests: all
 	for TARGET in $(TARGETS); do \
-		make -C $$TARGET run_tests; \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
 	done;
 
 hotplug:
 	for TARGET in $(TARGETS_HOTPLUG); do \
-		make -C $$TARGET; \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
 	done;
 
 run_hotplug: hotplug
 	for TARGET in $(TARGETS_HOTPLUG); do \
-		make -C $$TARGET run_full_test; \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
 	done;
 
 clean_hotplug:
 	for TARGET in $(TARGETS_HOTPLUG); do \
-		make -C $$TARGET clean; \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
 
 run_pstore_crash:
@@ -86,7 +101,8 @@ ifdef INSTALL_PATH
 	@# Ask all targets to install their files
 	mkdir -p $(INSTALL_PATH)
 	for TARGET in $(TARGETS); do \
-		make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
 	done;
 
 	@# Ask all targets to emit their test scripts
@@ -95,10 +111,11 @@ ifdef INSTALL_PATH
 	echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
 
 	for TARGET in $(TARGETS); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
 		echo "echo ========================================" >> $(ALL_SCRIPT); \
 		echo "cd $$TARGET" >> $(ALL_SCRIPT); \
-		make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+		make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
 		echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
 	done;
 
@@ -109,7 +126,8 @@ endif
 
 clean:
 	for TARGET in $(TARGETS); do \
-		make -C $$TARGET clean; \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
 	done;
 
 .PHONY: install
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 48d1f863d584..2e13035dff7f 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -5,18 +5,19 @@ TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
 # Makefile is a run-time dependency, since it's accessed by the execveat test
 TEST_FILES := Makefile
 
-EXTRA_CLEAN := subdir.moved execveat.moved xxxxx*
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
 
 include ../lib.mk
 
-subdir:
+$(OUTPUT)/subdir:
 	mkdir -p $@
-script:
+$(OUTPUT)/script:
 	echo '#!/bin/sh' > $@
 	echo 'exit $$*' >> $@
 	chmod +x $@
-execveat.symlink: execveat
-	ln -s -f $< $@
-execveat.denatured: execveat
+$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
+	cd $(OUTPUT) && ln -s -f $(shell basename $<) $(shell basename $@)
+$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
 	cp $< $@
 	chmod -x $@
+
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 6c64b421a4fd..a8a5e21850e7 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -2,6 +2,6 @@ all:
 
 TEST_PROGS := ftracetest
 TEST_FILES := test.d
-EXTRA_CLEAN := logs/*
+EXTRA_CLEAN := $(OUTPUT)/logs/*
 
 include ../lib.mk
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 6a1752956283..653c5cd9e44d 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -3,13 +3,18 @@ SUBDIRS := functional
 TEST_PROGS := run.sh
 
 .PHONY: all clean
-all:
-	for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
 
 include ../lib.mk
 
+all:
+	for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$$OUTPUT/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+	done
+
 override define RUN_TESTS
-	./run.sh
+	@if [ `dirname $(OUTPUT)` = $(PWD) ]; then ./run.sh; fi
 endef
 
 override define INSTALL_RULE
@@ -17,7 +22,9 @@ override define INSTALL_RULE
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
 
 	@for SUBDIR in $(SUBDIRS); do \
-		$(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+		BUILD_TARGET=$$OUTPUT/$$SUBDIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
 	done;
 endef
 
@@ -26,4 +33,8 @@ override define EMIT_TESTS
 endef
 
 clean:
-	for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
+	for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$$OUTPUT/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+	done
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index 74a8adda58fb..47aa9887f9d4 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -2,7 +2,7 @@ CFLAGS += -I../../../../usr/include/
 
 TEST_GEN_PROGS := kcmp_test
 
-EXTRA_CLEAN := kcmp-test-file
+EXTRA_CLEAN := $(OUTPUT)/kcmp-test-file
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 0f7a371beff7..01bb7782a35e 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -4,7 +4,8 @@ CC := $(CROSS_COMPILE)gcc
 
 define RUN_TESTS
 	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
-		(./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
+		BASENAME_TEST=`basename $$TEST`;	\
+		cd `dirname $$TEST`; (./$$BASENAME_TEST && echo "selftests: $$BASENAME_TEST [PASS]") || echo "selftests:  $$BASENAME_TEST [FAIL]"; cd -;\
 	done;
 endef
 
@@ -33,19 +34,29 @@ endif
 
 define EMIT_TESTS
 	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
-		echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
+		BASENAME_TEST=`basename $$TEST`;	\
+		echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \
 	done;
 endef
 
 emit_tests:
 	$(EMIT_TESTS)
 
+TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+
 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
 
 clean:
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 
-%: %.c
-	$(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -o $@ $^
+$(OUTPUT)/%:%.c
+	$(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) $< -o $@
+
+$(OUTPUT)/%.o:%.S
+	$(CC) $(ASFLAGS) -c $< -o $@
+
+$(OUTPUT)/%:%.S
+	$(CC) $(ASFLAGS) $< -o $@
 
 .PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index c2c4211ba58b..1d48c0cab596 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -34,32 +34,35 @@ endif
 all: $(SUB_DIRS)
 
 $(SUB_DIRS):
-	$(MAKE) -k -C $@ all
+	BUILD_TARGET=$$OUTPUT/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
 
 include ../lib.mk
 
 override define RUN_TESTS
 	@for TARGET in $(SUB_DIRS); do \
-		$(MAKE) -C $$TARGET run_tests; \
+		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
 	done;
 endef
 
 override define INSTALL_RULE
 	@for TARGET in $(SUB_DIRS); do \
-		$(MAKE) -C $$TARGET install; \
+		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
 	done;
 endef
 
 override define EMIT_TESTS
 	@for TARGET in $(SUB_DIRS); do \
-		$(MAKE) -s -C $$TARGET emit_tests; \
+		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
 	done;
 endef
 
 clean:
 	@for TARGET in $(SUB_DIRS); do \
-		$(MAKE) -C $$TARGET clean; \
-	done;
+		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done;
 	rm -f tags
 
 tags:
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index 08a55bd17800..286c6ed2378c 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -6,6 +6,6 @@ $(TEST_GEN_PROGS): ../harness.c
 
 include ../../lib.mk
 
-context_switch: ../utils.c
-context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
-context_switch: LDLIBS += -lpthread
+$(OUTPUT)/context_switch: ../utils.c
+$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
+$(OUTPUT)/context_switch: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 9ad1558e2ea9..681ab19d0a84 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,9 +12,9 @@ EXTRA_SOURCES := validate.c ../harness.c
 
 include ../../lib.mk
 
-copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
-copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
-memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
-memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
+$(OUTPUT)/copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
+$(OUTPUT)/copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
+$(OUTPUT)/memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
+$(OUTPUT)/memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
 
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 4262de42017b..c5639deb8887 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -4,6 +4,6 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test	\
 
 include ../../lib.mk
 
-dscr_default_test: LDLIBS += -lpthread
+$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
 
 $(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 814c38591b13..da9f42feaaac 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -5,13 +5,13 @@ include ../../lib.mk
 $(TEST_GEN_PROGS): ../harness.c
 $(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
 
-fpu_syscall: fpu_asm.S
-fpu_preempt: fpu_asm.S
-fpu_signal:  fpu_asm.S
+$(OUTPUT)/pu_syscall: fpu_asm.S
+$(OUTPUT)/pu_preempt: fpu_asm.S
+$(OUTPUT)/pu_signal:  fpu_asm.S
 
-vmx_syscall: vmx_asm.S
-vmx_preempt: vmx_asm.S
-vmx_signal: vmx_asm.S
+$(OUTPUT)/mx_syscall: vmx_asm.S
+$(OUTPUT)/mx_preempt: vmx_asm.S
+$(OUTPUT)/mx_signal: vmx_asm.S
 
 vsx_preempt: CFLAGS += -mvsx
 vsx_preempt: vsx_asm.S
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index d5633783c82e..1cffe54dccfb 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -8,8 +8,8 @@ include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c
 
-prot_sao: ../utils.c
+$(OUTPUT)/prot_sao: ../utils.c
 
-tempfile:
-	dd if=/dev/zero of=tempfile bs=64k count=1
+$(OUTPUT)/tempfile:
+	dd if=/dev/zero of=$@ bs=64k count=1
 
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index ab0f9026c21c..097b08acd867 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -11,34 +11,34 @@ all: $(TEST_GEN_PROGS) ebb
 $(TEST_GEN_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
-count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
+$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
 	$(CC) $(CFLAGS) -m64 -o $@ $^
 
-per_event_excludes: ../utils.c
+$(OUTPUT)/per_event_excludes: ../utils.c
 
 DEFAULT_RUN_TESTS := $(RUN_TESTS)
 override define RUN_TESTS
 	$(DEFAULT_RUN_TESTS)
-	$(MAKE) -C ebb run_tests
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
 endef
 
 DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
 override define EMIT_TESTS
 	$(DEFAULT_EMIT_TESTS)
-	$(MAKE) -s -C ebb emit_tests
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
 endef
 
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
 	$(DEFAULT_INSTALL_RULE)
-	$(MAKE) -C ebb install
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET: -C $$TARGET install
 endef
 
 clean:
-	$(RM) $(TEST_PROGS) loop.o
-	$(MAKE) -C ebb clean
+	$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET: -C $$TARGET clean
 
 ebb:
-	$(MAKE) -k -C $@ all
+	TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
 
 .PHONY: all run_tests clean ebb
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 0bc2bd6db511..6001fb0a377a 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -21,6 +21,6 @@ include ../../../lib.mk
 $(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
 	       ebb.c ebb_handler.S trace.c busy_loop.S
 
-instruction_count_test: ../loop.S
+$(OUTPUT)/instruction_count_test: ../loop.S
 
-lost_exception_test: ../lib.c
+$(OUTPUT)/lost_exception_test: ../lib.c
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index dbd05ac8dbf7..b92c2a132c4f 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -2,14 +2,14 @@ TEST_GEN_PROGS := switch_endian_test
 
 ASFLAGS += -O2 -Wall -g -nostdlib -m64
 
-EXTRA_CLEAN = *.o check-reversed.S
+EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
 
 include ../../lib.mk
 
-switch_endian_test: check-reversed.S
+$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
 
-check-reversed.o: check.o
+$(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
 	$(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
 
-check-reversed.S: check-reversed.o
+$(OUTPUT)/check-reversed.S: $(OUTPUT)/check-reversed.o
 	hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 117c6247928a..07da21769ff8 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -10,9 +10,9 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 CFLAGS += -mhtm
 
-tm-syscall: tm-syscall-asm.S
-tm-syscall: CFLAGS += -I../../../../../usr/include
-tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-syscall: tm-syscall-asm.S
+$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
+$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
 $(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 8141d5169f4d..983140e68661 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -17,8 +17,8 @@ TEST_PROGS := run_vmtests
 
 include ../lib.mk
 
-userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
-mlock-random-test: LDLIBS += -lcap
+$(OUTPUT)/userfaultfd: LDLIBS += -lpthread ../../../../usr/include/linux/kernel.h
+$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
 
 ../../../../usr/include/linux/kernel.h:
 	make -C ../../../.. headers_install
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 8c1cb423cfe6..f251cf9e62f0 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -17,6 +17,9 @@ TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
 BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
 BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
 
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
 CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
 
 UNAME_M := $(shell uname -m)
@@ -40,10 +43,10 @@ all_64: $(BINARIES_64)
 clean:
 	$(RM) $(BINARIES_32) $(BINARIES_64)
 
-$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
 	$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
 
-$(TARGETS_C_64BIT_ALL:%=%_64): %_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
 	$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
 
 # x86_64 users should be encouraged to install 32-bit libraries
@@ -65,12 +68,12 @@ warn_32bit_failure:
 endif
 
 # Some tests have additional dependencies.
-sysret_ss_attrs_64: thunks.S
-ptrace_syscall_32: raw_syscall_helper_32.S
-test_syscall_vdso_32: thunks_32.S
+$(OUTPUT)/sysret_ss_attrs_64: thunks.S
+$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S
+$(OUTPUT)/test_syscall_vdso_32: thunks_32.S
 
 # check_initial_reg_state is special: it needs a custom entry, and it
 # needs to be static so that its interpreter doesn't destroy its initial
 # state.
-check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
-check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
-- 
cgit v1.2.3


From 5722569bb9c3bd922c4f10b5b2912fe88c255312 Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Mon, 9 Jan 2017 10:19:47 -0800
Subject: bpf: allow helpers access to map element values

Enable helpers to directly access a map element value by passing a
register type PTR_TO_MAP_VALUE (or PTR_TO_MAP_VALUE_ADJ) to helper
arguments ARG_PTR_TO_STACK or ARG_PTR_TO_RAW_STACK.

This enables several use cases. For example, a typical tracing program
might want to capture pathnames passed to sys_open() with:

struct trace_data {
	char pathname[PATHLEN];
};

SEC("kprobe/sys_open")
void bpf_sys_open(struct pt_regs *ctx)
{
	struct trace_data data;
	bpf_probe_read(data.pathname, sizeof(data.pathname), ctx->di);

	/* consume data.pathname, for example via
	 * bpf_trace_printk() or bpf_perf_event_output()
	 */
}

Such a program could easily hit the stack limit in case PATHLEN needs to
be large or more local variables need to exist, both of which are quite
common scenarios. Allowing direct helper access to map element values,
one could do:

struct bpf_map_def SEC("maps") scratch_map = {
	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
	.key_size = sizeof(u32),
	.value_size = sizeof(struct trace_data),
	.max_entries = 1,
};

SEC("kprobe/sys_open")
int bpf_sys_open(struct pt_regs *ctx)
{
	int id = 0;
	struct trace_data *p = bpf_map_lookup_elem(&scratch_map, &id);
	if (!p)
		return;
	bpf_probe_read(p->pathname, sizeof(p->pathname), ctx->di);

	/* consume p->pathname, for example via
	 * bpf_trace_printk() or bpf_perf_event_output()
	 */
}

And wouldn't risk exhausting the stack.

Code changes are loosely modeled after commit 6841de8b0d03 ("bpf: allow
helpers access the packet directly"). Unlike with PTR_TO_PACKET, these
changes just work with ARG_PTR_TO_STACK and ARG_PTR_TO_RAW_STACK (not
ARG_PTR_TO_MAP_KEY, ARG_PTR_TO_MAP_VALUE, ...): adding those would be
trivial, but since there is not currently a use case for that, it's
reasonable to limit the set of changes.

Also, add new tests to make sure accesses to map element values from
helpers never go out of boundary, even when adjusted.

Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       |   9 +-
 tools/testing/selftests/bpf/test_verifier.c | 491 ++++++++++++++++++++++++++++
 2 files changed, 498 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8333fbcfbfe7..b7014606745b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -627,7 +627,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
 {
 	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
 
-	if (off < 0 || off + size > map->value_size) {
+	if (off < 0 || size <= 0 || off + size > map->value_size) {
 		verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
 			map->value_size, off, size);
 		return -EACCES;
@@ -1025,7 +1025,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		 */
 		if (type == CONST_IMM && reg->imm == 0)
 			/* final test in check_stack_boundary() */;
-		else if (type != PTR_TO_PACKET && type != expected_type)
+		else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
+			 type != PTR_TO_MAP_VALUE_ADJ && type != expected_type)
 			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK;
 	} else {
@@ -1088,6 +1089,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		}
 		if (regs[regno - 1].type == PTR_TO_PACKET)
 			err = check_packet_access(env, regno - 1, 0, reg->imm);
+		else if (regs[regno - 1].type == PTR_TO_MAP_VALUE)
+			err = check_map_access(env, regno - 1, 0, reg->imm);
+		else if (regs[regno - 1].type == PTR_TO_MAP_VALUE_ADJ)
+			err = check_map_access_adj(env, regno - 1, 0, reg->imm);
 		else
 			err = check_stack_boundary(env, regno - 1, reg->imm,
 						   zero_size_allowed, meta);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 853d7e43434a..b7732e557bf9 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2905,6 +2905,497 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.errstr = "invalid bpf_context access",
 	},
+	{
+		"helper access to map: full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=0",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: out-of-bound range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=56",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: negative range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=-8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): out-of-bound range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo) + 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): negative range (> adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): negative range (< adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): out-of-bound range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo) + 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): negative range (> adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): negative range (< adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): no max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is negative, either use unsigned index or do a if (index >=0) check",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): wrong max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo) + 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=45",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From f0318d01b694485af9678a4e120328ae3555be6d Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Mon, 9 Jan 2017 10:19:48 -0800
Subject: bpf: allow adjusted map element values to spill

commit 484611357c19 ("bpf: allow access into map value arrays")
introduces the ability to do pointer math inside a map element value via
the PTR_TO_MAP_VALUE_ADJ register type.

The current support doesn't handle the case where a PTR_TO_MAP_VALUE_ADJ
is spilled into the stack, limiting several use cases, especially when
generating bpf code from a compiler.

Handle this case by explicitly enabling the register type
PTR_TO_MAP_VALUE_ADJ to be spilled. Also, make sure that min_value and
max_value are reset just for BPF_LDX operations that don't result in a
restore of a spilled register from stack.

Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       | 21 +++++++++----
 tools/testing/selftests/bpf/test_verifier.c | 46 +++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b7014606745b..59ed07b9b4ea 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -481,6 +481,13 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
 	regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
 }
 
+static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
+					     u32 regno)
+{
+	mark_reg_unknown_value(regs, regno);
+	reset_reg_range_values(regs, regno);
+}
+
 enum reg_arg_type {
 	SRC_OP,		/* register is used as source operand */
 	DST_OP,		/* register is used as destination operand */
@@ -532,6 +539,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	switch (type) {
 	case PTR_TO_MAP_VALUE:
 	case PTR_TO_MAP_VALUE_OR_NULL:
+	case PTR_TO_MAP_VALUE_ADJ:
 	case PTR_TO_STACK:
 	case PTR_TO_CTX:
 	case PTR_TO_PACKET:
@@ -616,7 +624,8 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
 		}
 		if (value_regno >= 0)
 			/* have read misc data from the stack */
-			mark_reg_unknown_value(state->regs, value_regno);
+			mark_reg_unknown_value_and_range(state->regs,
+							 value_regno);
 		return 0;
 	}
 }
@@ -825,7 +834,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 		else
 			err = check_map_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value(state->regs, value_regno);
+			mark_reg_unknown_value_and_range(state->regs,
+							 value_regno);
 
 	} else if (reg->type == PTR_TO_CTX) {
 		enum bpf_reg_type reg_type = UNKNOWN_VALUE;
@@ -837,7 +847,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 		}
 		err = check_ctx_access(env, off, size, t, &reg_type);
 		if (!err && t == BPF_READ && value_regno >= 0) {
-			mark_reg_unknown_value(state->regs, value_regno);
+			mark_reg_unknown_value_and_range(state->regs,
+							 value_regno);
 			/* note that reg.[id|off|range] == 0 */
 			state->regs[value_regno].type = reg_type;
 		}
@@ -870,7 +881,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
 		}
 		err = check_packet_access(env, regno, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
-			mark_reg_unknown_value(state->regs, value_regno);
+			mark_reg_unknown_value_and_range(state->regs,
+							 value_regno);
 	} else {
 		verbose("R%d invalid mem access '%s'\n",
 			regno, reg_type_str[reg->type]);
@@ -2744,7 +2756,6 @@ static int do_check(struct bpf_verifier_env *env)
 			if (err)
 				return err;
 
-			reset_reg_range_values(regs, insn->dst_reg);
 			if (BPF_SIZE(insn->code) != BPF_W &&
 			    BPF_SIZE(insn->code) != BPF_DW) {
 				insn_idx++;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index b7732e557bf9..e7b075819c08 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3396,6 +3396,52 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
+	{
+		"map element value is preserved across register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value (adjusted) is preserved across register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
+				offsetof(struct test_val, foo)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 06c1c049721a995dee2829ad13b24aaf5d7c5cce Mon Sep 17 00:00:00 2001
From: Gianluca Borello <g.borello@gmail.com>
Date: Mon, 9 Jan 2017 10:19:49 -0800
Subject: bpf: allow helpers access to variable memory

Currently, helpers that read and write from/to the stack can do so using
a pair of arguments of type ARG_PTR_TO_STACK and ARG_CONST_STACK_SIZE.
ARG_CONST_STACK_SIZE accepts a constant register of type CONST_IMM, so
that the verifier can safely check the memory access. However, requiring
the argument to be a constant can be limiting in some circumstances.

Since the current logic keeps track of the minimum and maximum value of
a register throughout the simulated execution, ARG_CONST_STACK_SIZE can
be changed to also accept an UNKNOWN_VALUE register in case its
boundaries have been set and the range doesn't cause invalid memory
accesses.

One common situation when this is useful:

int len;
char buf[BUFSIZE]; /* BUFSIZE is 128 */

if (some_condition)
	len = 42;
else
	len = 84;

some_helper(..., buf, len & (BUFSIZE - 1));

The compiler can often decide to assign the constant values 42 or 48
into a variable on the stack, instead of keeping it in a register. When
the variable is then read back from stack into the register in order to
be passed to the helper, the verifier will not be able to recognize the
register as constant (the verifier is not currently tracking all
constant writes into memory), and the program won't be valid.

However, by allowing the helper to accept an UNKNOWN_VALUE register,
this program will work because the bitwise AND operation will set the
range of possible values for the UNKNOWN_VALUE register to [0, BUFSIZE),
so the verifier can guarantee the helper call will be safe (assuming the
argument is of type ARG_CONST_STACK_SIZE_OR_ZERO, otherwise one more
check against 0 would be needed). Custom ranges can be set not only with
ALU operations, but also by explicitly comparing the UNKNOWN_VALUE
register with constants.

Another very common example happens when intercepting system call
arguments and accessing user-provided data of variable size using
bpf_probe_read(). One can load at runtime the user-provided length in an
UNKNOWN_VALUE register, and then read that exact amount of data up to a
compile-time determined limit in order to fit into the proper local
storage allocated on the stack, without having to guess a suboptimal
access size at compile time.

Also, in case the helpers accepting the UNKNOWN_VALUE register operate
in raw mode, disable the raw mode so that the program is required to
initialize all memory, since there is no guarantee the helper will fill
it completely, leaving possibilities for data leak (just relevant when
the memory used by the helper is the stack, not when using a pointer to
map element value or packet). In other words, ARG_PTR_TO_RAW_STACK will
be treated as ARG_PTR_TO_STACK.

Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       |  74 ++++-
 tools/testing/selftests/bpf/test_verifier.c | 410 ++++++++++++++++++++++++++++
 2 files changed, 474 insertions(+), 10 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 59ed07b9b4ea..3d4f7bf32aaf 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -980,6 +980,25 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 	return 0;
 }
 
+static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
+				   int access_size, bool zero_size_allowed,
+				   struct bpf_call_arg_meta *meta)
+{
+	struct bpf_reg_state *regs = env->cur_state.regs;
+
+	switch (regs[regno].type) {
+	case PTR_TO_PACKET:
+		return check_packet_access(env, regno, 0, access_size);
+	case PTR_TO_MAP_VALUE:
+		return check_map_access(env, regno, 0, access_size);
+	case PTR_TO_MAP_VALUE_ADJ:
+		return check_map_access_adj(env, regno, 0, access_size);
+	default: /* const_imm|ptr_to_stack or invalid ptr */
+		return check_stack_boundary(env, regno, access_size,
+					    zero_size_allowed, meta);
+	}
+}
+
 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			  enum bpf_arg_type arg_type,
 			  struct bpf_call_arg_meta *meta)
@@ -1018,7 +1037,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
 		expected_type = CONST_IMM;
-		if (type != expected_type)
+		/* One exception. Allow UNKNOWN_VALUE registers when the
+		 * boundaries are known and don't cause unsafe memory accesses
+		 */
+		if (type != UNKNOWN_VALUE && type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
@@ -1099,15 +1121,47 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
 			return -EACCES;
 		}
-		if (regs[regno - 1].type == PTR_TO_PACKET)
-			err = check_packet_access(env, regno - 1, 0, reg->imm);
-		else if (regs[regno - 1].type == PTR_TO_MAP_VALUE)
-			err = check_map_access(env, regno - 1, 0, reg->imm);
-		else if (regs[regno - 1].type == PTR_TO_MAP_VALUE_ADJ)
-			err = check_map_access_adj(env, regno - 1, 0, reg->imm);
-		else
-			err = check_stack_boundary(env, regno - 1, reg->imm,
-						   zero_size_allowed, meta);
+
+		/* If the register is UNKNOWN_VALUE, the access check happens
+		 * using its boundaries. Otherwise, just use its imm
+		 */
+		if (type == UNKNOWN_VALUE) {
+			/* For unprivileged variable accesses, disable raw
+			 * mode so that the program is required to
+			 * initialize all the memory that the helper could
+			 * just partially fill up.
+			 */
+			meta = NULL;
+
+			if (reg->min_value < 0) {
+				verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
+					regno);
+				return -EACCES;
+			}
+
+			if (reg->min_value == 0) {
+				err = check_helper_mem_access(env, regno - 1, 0,
+							      zero_size_allowed,
+							      meta);
+				if (err)
+					return err;
+			}
+
+			if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
+				verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+					regno);
+				return -EACCES;
+			}
+			err = check_helper_mem_access(env, regno - 1,
+						      reg->max_value,
+						      zero_size_allowed, meta);
+			if (err)
+				return err;
+		} else {
+			/* register is CONST_IMM */
+			err = check_helper_mem_access(env, regno - 1, reg->imm,
+						      zero_size_allowed, meta);
+		}
 	}
 
 	return err;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e7b075819c08..9bb45346dc72 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -3442,6 +3442,416 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.result_unpriv = REJECT,
 	},
+	{
+		"helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, bitwise AND, zero included",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=0",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=65",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP (signed), correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, bounds + offset",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=65",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, wrong max",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=65",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, no max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 unbounded memory access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, no min check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=0",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP (signed), no min check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map, JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val), 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map, JMP, wrong max",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val) + 1, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=49",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map adjusted, JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val) - 20, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map adjusted, JMP, wrong max",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val) - 19, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size > 0 not allowed on NULL",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 type=imm expected=fp",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size = 0 not allowed on != NULL",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-8 access_size=0",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: 8 bytes leak",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid indirect read from stack off -64+32 size 64",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: 8 bytes no leak (init memory)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From f5786313f091a19eff934d1074642906c71c0c66 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 16 Dec 2016 03:10:34 -0800
Subject: selftests: firmware: only modprobe if driver is missing

No need to load test_firmware if its already there.
Also use a more generic form to recommend what is required
to be built.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 5c495ad7958a..c8ccdaa78479 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -5,9 +5,24 @@
 # know so we can be sure we're not accidentally testing the user helper.
 set -e
 
-modprobe test_firmware
-
 DIR=/sys/devices/virtual/misc/test_firmware
+TEST_DIR=$(dirname $0)
+
+test_modprobe()
+{
+	if [ ! -d $DIR ]; then
+		echo "$0: $DIR not present"
+		echo "You must have the following enabled in your kernel:"
+		cat $TEST_DIR/config
+		exit 1
+	fi
+}
+
+trap "test_modprobe" EXIT
+
+if [ ! -d $DIR ]; then
+	modprobe test_firmware
+fi
 
 # CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
 # These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
-- 
cgit v1.2.3


From 880444e214cfd293a2e8cc4bd3505f7ffa6ce33a Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Fri, 16 Dec 2016 03:10:35 -0800
Subject: selftests: firmware: send expected errors to /dev/null

Error that we expect should not be spilled to stdout.

Without this we get:

./fw_filesystem.sh: line 58: printf: write error: Invalid argument
./fw_filesystem.sh: line 63: printf: write error: No such device
./fw_filesystem.sh: line 69: echo: write error: No such file or directory
./fw_filesystem.sh: filesystem loading works
./fw_filesystem.sh: async filesystem loading works

With it:

./fw_filesystem.sh: filesystem loading works
./fw_filesystem.sh: async filesystem loading works

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_filesystem.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index c8ccdaa78479..e35691239350 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -63,18 +63,18 @@ echo "ABCD0123" >"$FW"
 
 NAME=$(basename "$FW")
 
-if printf '\000' >"$DIR"/trigger_request; then
+if printf '\000' >"$DIR"/trigger_request 2> /dev/null; then
 	echo "$0: empty filename should not succeed" >&2
 	exit 1
 fi
 
-if printf '\000' >"$DIR"/trigger_async_request; then
+if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
 	echo "$0: empty filename should not succeed (async)" >&2
 	exit 1
 fi
 
 # Request a firmware that doesn't exist, it should fail.
-if echo -n "nope-$NAME" >"$DIR"/trigger_request; then
+if echo -n "nope-$NAME" >"$DIR"/trigger_request 2> /dev/null; then
 	echo "$0: firmware shouldn't have loaded" >&2
 	exit 1
 fi
-- 
cgit v1.2.3


From be7b89eef7b7fe647eee071a5bb44d6d853f92eb Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 5 Jan 2017 14:01:03 -0700
Subject: selftests: gpio add .gitignore for generated files

Add .gitignore for generated files

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/gpio/.gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 tools/testing/selftests/gpio/.gitignore

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
new file mode 100644
index 000000000000..7d14f743d1a4
--- /dev/null
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -0,0 +1 @@
+gpio-mockup-chardev
-- 
cgit v1.2.3


From a33f9d8e02edef6c2d9cfdf0b79a743d12468438 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 5 Jan 2017 13:58:54 -0700
Subject: selftests: ipc add missing generated file to .gitignore

Add missing generated file msgque to .gitignore

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/ipc/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
index 84b66a3c1f74..9af04c9353c0 100644
--- a/tools/testing/selftests/ipc/.gitignore
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -1 +1,2 @@
 msgque_test
+msgque
-- 
cgit v1.2.3


From fbb02ed8a14f54d0d07975025757e861e680e3be Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 5 Jan 2017 14:03:39 -0700
Subject: selftests: x86 protection_keys fix unused variable compile warnings

Fix unused variable compile warnings in protection_keys.c

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/x86/protection_keys.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bdd58c78902e..70402cd96265 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -812,8 +812,6 @@ void setup_hugetlbfs(void)
 {
 	int err;
 	int fd;
-	int validated_nr_pages;
-	int i;
 	char buf[] = "123";
 
 	if (geteuid() != 0) {
@@ -1133,7 +1131,6 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
 void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 {
 	int err;
-	int bad_flag = (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) + 1;
 	int bad_pkey = NR_PKEYS+99;
 
 	/* not enforced when pkey_get() is not a syscall
@@ -1149,8 +1146,6 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 /* Assumes that all pkeys other than 'pkey' are unallocated */
 void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
 {
-	unsigned long flags;
-	unsigned long init_val;
 	int err;
 	int allocated_pkeys[NR_PKEYS] = {0};
 	int nr_allocated_pkeys = 0;
-- 
cgit v1.2.3


From 3e91293ffccafe30daa7310526bbfbc287e0c0fd Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Thu, 5 Jan 2017 14:11:00 -0700
Subject: selftests: x86 protection_keys remove dead code

Remove commented out calls to pkey_get().

Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/x86/protection_keys.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 70402cd96265..46f53ec502f6 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -1114,11 +1114,6 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
 		err = sys_pkey_free(i);
 		pkey_assert(err);
 
-		/* not enforced when pkey_get() is not a syscall
-		err = pkey_get(i, 0);
-		pkey_assert(err < 0);
-		*/
-
 		err = sys_pkey_free(i);
 		pkey_assert(err);
 
@@ -1133,11 +1128,6 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
 	int err;
 	int bad_pkey = NR_PKEYS+99;
 
-	/* not enforced when pkey_get() is not a syscall
-	err = pkey_get(bad_pkey, bad_flag);
-	pkey_assert(err < 0);
-	*/
-
 	/* pass a known-invalid pkey in: */
 	err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
 	pkey_assert(err);
-- 
cgit v1.2.3


From 62c7989b24dbd348c2507ee6458ebf5637d6ddb5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 12 Jan 2017 11:51:33 +0100
Subject: bpf: allow b/h/w/dw access for bpf's cb in ctx

When structs are used to store temporary state in cb[] buffer that is
used with programs and among tail calls, then the generated code will
not always access the buffer in bpf_w chunks. We can ease programming
of it and let this act more natural by allowing for aligned b/h/w/dw
sized access for cb[] ctx member. Various test cases are attached as
well for the selftest suite. Potentially, this can also be reused for
other program types to pass data around.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       |   8 +-
 net/core/filter.c                           |  41 ++-
 tools/testing/selftests/bpf/test_verifier.c | 442 +++++++++++++++++++++++++++-
 3 files changed, 478 insertions(+), 13 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index df7e47244e75..d60e12c67266 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3165,10 +3165,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 	insn = env->prog->insnsi + delta;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
+		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
 			type = BPF_READ;
-		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
+			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
+			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
 			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
 			type = BPF_WRITE;
 		else
diff --git a/net/core/filter.c b/net/core/filter.c
index 8cfbdefbfb1c..90383860e224 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2776,11 +2776,33 @@ static bool __is_valid_access(int off, int size)
 {
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
+
 	/* The verifier guarantees that size > 0. */
 	if (off % size != 0)
 		return false;
-	if (size != sizeof(__u32))
-		return false;
+
+	switch (off) {
+	case offsetof(struct __sk_buff, cb[0]) ...
+	     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
+		if (size == sizeof(__u16) &&
+		    off > offsetof(struct __sk_buff, cb[4]) + sizeof(__u16))
+			return false;
+		if (size == sizeof(__u32) &&
+		    off > offsetof(struct __sk_buff, cb[4]))
+			return false;
+		if (size == sizeof(__u64) &&
+		    off > offsetof(struct __sk_buff, cb[2]))
+			return false;
+		if (size != sizeof(__u8)  &&
+		    size != sizeof(__u16) &&
+		    size != sizeof(__u32) &&
+		    size != sizeof(__u64))
+			return false;
+		break;
+	default:
+		if (size != sizeof(__u32))
+			return false;
+	}
 
 	return true;
 }
@@ -2799,7 +2821,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	if (type == BPF_WRITE) {
 		switch (off) {
 		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]):
+		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
 			break;
 		default:
 			return false;
@@ -2823,7 +2845,7 @@ static bool lwt_is_valid_access(int off, int size,
 		case offsetof(struct __sk_buff, mark):
 		case offsetof(struct __sk_buff, priority):
 		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]):
+		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
 			break;
 		default:
 			return false;
@@ -2915,7 +2937,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 		case offsetof(struct __sk_buff, tc_index):
 		case offsetof(struct __sk_buff, priority):
 		case offsetof(struct __sk_buff, cb[0]) ...
-		     offsetof(struct __sk_buff, cb[4]):
+		     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
 		case offsetof(struct __sk_buff, tc_classid):
 			break;
 		default:
@@ -3066,8 +3088,11 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type,
 					  si->dst_reg, si->src_reg, insn);
 
 	case offsetof(struct __sk_buff, cb[0]) ...
-	     offsetof(struct __sk_buff, cb[4]):
+	     offsetof(struct __sk_buff, cb[4]) + sizeof(__u32) - 1:
 		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+		BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
+			      offsetof(struct qdisc_skb_cb, data)) %
+			     sizeof(__u64));
 
 		prog->cb_access = 1;
 		off  = si->off;
@@ -3075,10 +3100,10 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type,
 		off += offsetof(struct sk_buff, cb);
 		off += offsetof(struct qdisc_skb_cb, data);
 		if (type == BPF_WRITE)
-			*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg,
+			*insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
 					      si->src_reg, off);
 		else
-			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg,
+			*insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
 					      si->src_reg, off);
 		break;
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 9bb45346dc72..1aa73241c999 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -859,15 +859,451 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 	},
 	{
-		"check non-u32 access to cb",
+		"check cb access: byte",
 		.insns = {
-			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 3),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: byte, oob 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: byte, oob 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) - 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: byte, oob 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: byte, oob 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) - 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: byte, wrong type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"check cb access: half",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: half, unaligned",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: half, oob 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: half, oob 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) - 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: half, oob 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: half, oob 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) - 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: half, wrong type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"check cb access: word",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: word, unaligned 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: word, unaligned 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: word, unaligned 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: word, unaligned 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: double, unaligned 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, unaligned 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) - 8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) - 8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, wrong type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
 				    offsetof(struct __sk_buff, cb[0])),
 			BPF_EXIT_INSN(),
 		},
 		.errstr = "invalid bpf_context access",
-		.errstr_unpriv = "R1 leaks addr",
 		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
 	},
 	{
 		"check out of range skb->cb access",
-- 
cgit v1.2.3


From 4d7b9dc1f36a99423a6171d393040165fb135530 Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Thu, 12 Jan 2017 05:10:11 -0800
Subject: tools: psock_lib: harden socket filter used by psock tests

The filter added by sock_setfilter is intended to only permit
packets matching the pattern set up by create_payload(), but
we only check the ip_len, and a single test-character in
the IP packet to ensure this condition.

Harden the filter by adding additional constraints so that we only
permit UDP/IPv4 packets that meet the ip_len and test-character
requirements. Include the bpf_asm src as a comment, in case this
needs to be enhanced in the future

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/psock_lib.h | 39 +++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 24bc7ec1be7d..a77da88bf946 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -40,14 +40,39 @@
 
 static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
 {
+	/* the filter below checks for all of the following conditions that
+	 * are based on the contents of create_payload()
+	 *  ether type 0x800 and
+	 *  ip proto udp     and
+	 *  skb->len == DATA_LEN and
+	 *  udp[38] == 'a' or udp[38] == 'b'
+	 * It can be generated from the following bpf_asm input:
+	 *	ldh [12]
+	 *	jne #0x800, drop	; ETH_P_IP
+	 *	ldb [23]
+	 *	jneq #17, drop		; IPPROTO_UDP
+	 *	ld len			; ld skb->len
+	 *	jlt #100, drop		; DATA_LEN
+	 *	ldb [80]
+	 *	jeq #97, pass		; DATA_CHAR
+	 *	jne #98, drop		; DATA_CHAR_1
+	 *	pass:
+	 *	  ret #-1
+	 *	drop:
+	 *	  ret #0
+	 */
 	struct sock_filter bpf_filter[] = {
-		{ 0x80, 0, 0, 0x00000000 },  /* LD  pktlen		      */
-		{ 0x35, 0, 4, DATA_LEN   },  /* JGE DATA_LEN  [f goto nomatch]*/
-		{ 0x30, 0, 0, 0x00000050 },  /* LD  ip[80]		      */
-		{ 0x15, 1, 0, DATA_CHAR  },  /* JEQ DATA_CHAR   [t goto match]*/
-		{ 0x15, 0, 1, DATA_CHAR_1},  /* JEQ DATA_CHAR_1 [t goto match]*/
-		{ 0x06, 0, 0, 0x00000060 },  /* RET match	              */
-		{ 0x06, 0, 0, 0x00000000 },  /* RET no match		      */
+		{ 0x28,  0,  0, 0x0000000c },
+		{ 0x15,  0,  8, 0x00000800 },
+		{ 0x30,  0,  0, 0x00000017 },
+		{ 0x15,  0,  6, 0x00000011 },
+		{ 0x80,  0,  0, 0000000000 },
+		{ 0x35,  0,  4, 0x00000064 },
+		{ 0x30,  0,  0, 0x00000050 },
+		{ 0x15,  1,  0, 0x00000061 },
+		{ 0x15,  0,  1, 0x00000062 },
+		{ 0x06,  0,  0, 0xffffffff },
+		{ 0x06,  0,  0, 0000000000 },
 	};
 	struct sock_fprog bpf_prog;
 
-- 
cgit v1.2.3


From 0186a6cbdc6287fde65858e5d9c714dc167b8ace Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 1 Dec 2016 11:47:05 +0000
Subject: locking/ww_mutex: Add ww_mutex to locktorture test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Although ww_mutexes degenerate into mutexes, it would be useful to
torture the deadlock handling between multiple ww_mutexes in addition to
torturing the regular mutexes.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Nicolai Hähnle <nhaehnle@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161201114711.28697-3-chris@chris-wilson.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/locking/locktorture.c                       | 73 ++++++++++++++++++++++
 .../selftests/rcutorture/configs/lock/CFLIST       |  1 +
 .../selftests/rcutorture/configs/lock/LOCK07       |  6 ++
 .../selftests/rcutorture/configs/lock/LOCK07.boot  |  1 +
 4 files changed, 81 insertions(+)
 create mode 100644 tools/testing/selftests/rcutorture/configs/lock/LOCK07
 create mode 100644 tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot

(limited to 'tools/testing')

diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index f8c5af52a131..9bffedd82884 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -372,6 +372,78 @@ static struct lock_torture_ops mutex_lock_ops = {
 	.name		= "mutex_lock"
 };
 
+#include <linux/ww_mutex.h>
+static DEFINE_WW_CLASS(torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class);
+static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class);
+
+static int torture_ww_mutex_lock(void)
+__acquires(torture_ww_mutex_0)
+__acquires(torture_ww_mutex_1)
+__acquires(torture_ww_mutex_2)
+{
+	LIST_HEAD(list);
+	struct reorder_lock {
+		struct list_head link;
+		struct ww_mutex *lock;
+	} locks[3], *ll, *ln;
+	struct ww_acquire_ctx ctx;
+
+	locks[0].lock = &torture_ww_mutex_0;
+	list_add(&locks[0].link, &list);
+
+	locks[1].lock = &torture_ww_mutex_1;
+	list_add(&locks[1].link, &list);
+
+	locks[2].lock = &torture_ww_mutex_2;
+	list_add(&locks[2].link, &list);
+
+	ww_acquire_init(&ctx, &torture_ww_class);
+
+	list_for_each_entry(ll, &list, link) {
+		int err;
+
+		err = ww_mutex_lock(ll->lock, &ctx);
+		if (!err)
+			continue;
+
+		ln = ll;
+		list_for_each_entry_continue_reverse(ln, &list, link)
+			ww_mutex_unlock(ln->lock);
+
+		if (err != -EDEADLK)
+			return err;
+
+		ww_mutex_lock_slow(ll->lock, &ctx);
+		list_move(&ll->link, &list);
+	}
+
+	ww_acquire_fini(&ctx);
+	return 0;
+}
+
+static void torture_ww_mutex_unlock(void)
+__releases(torture_ww_mutex_0)
+__releases(torture_ww_mutex_1)
+__releases(torture_ww_mutex_2)
+{
+	ww_mutex_unlock(&torture_ww_mutex_0);
+	ww_mutex_unlock(&torture_ww_mutex_1);
+	ww_mutex_unlock(&torture_ww_mutex_2);
+}
+
+static struct lock_torture_ops ww_mutex_lock_ops = {
+	.writelock	= torture_ww_mutex_lock,
+	.write_delay	= torture_mutex_delay,
+	.task_boost     = torture_boost_dummy,
+	.writeunlock	= torture_ww_mutex_unlock,
+	.readlock       = NULL,
+	.read_delay     = NULL,
+	.readunlock     = NULL,
+	.name		= "ww_mutex_lock"
+};
+
 #ifdef CONFIG_RT_MUTEXES
 static DEFINE_RT_MUTEX(torture_rtmutex);
 
@@ -793,6 +865,7 @@ static int __init lock_torture_init(void)
 		&spin_lock_ops, &spin_lock_irq_ops,
 		&rw_lock_ops, &rw_lock_irq_ops,
 		&mutex_lock_ops,
+		&ww_mutex_lock_ops,
 #ifdef CONFIG_RT_MUTEXES
 		&rtmutex_lock_ops,
 #endif
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
index b9611c523723..41bae5824339 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -4,3 +4,4 @@ LOCK03
 LOCK04
 LOCK05
 LOCK06
+LOCK07
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
new file mode 100644
index 000000000000..97dadd1a9e45
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
@@ -0,0 +1 @@
+locktorture.torture_type=ww_mutex_lock
-- 
cgit v1.2.3


From 2b0b211134a65401ed874ce0d5d48844f4f6f341 Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Thu, 1 Dec 2016 11:47:11 +0000
Subject: locking/ww_mutex: Add ww_mutex to tools/testing/selftests

Add the minimal test running (modprobe test-ww_mutex) to the kselftests
CI framework.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20161201114711.28697-9-chris@chris-wilson.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/locking/ww_mutex.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 tools/testing/selftests/locking/ww_mutex.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
new file mode 100644
index 000000000000..6905da965f3b
--- /dev/null
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+
+if /sbin/modprobe -q test-ww_mutex; then
+       /sbin/modprobe -q -r test-ww_mutex
+       echo "locking/ww_mutex: ok"
+else
+       echo "locking/ww_mutex: [FAIL]"
+       exit 1
+fi
-- 
cgit v1.2.3


From 963447e6b5390e4e3ee4f5a02528ff4467e94614 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:29:18 -0800
Subject: torture: Add a check for CONFIG_RCU_STALL_COMMON for TINY01

This commit verifies coverage of testing with CONFIG_RCU_STALL_COMMON=n.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TINY01 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
index 0a63e073a00c..6db705e55487 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -7,6 +7,7 @@ CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_TRACE=n
+#CHECK#CONFIG_RCU_STALL_COMMON=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_PREEMPT_COUNT=n
-- 
cgit v1.2.3


From 4faf185cee66a3d1384a687406c80a9dc4124e3c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:30:25 -0800
Subject: torture: Add CONFIG_PROVE_RCU_REPEATEDLY=y for TINY02

This commit adds CONFIG_PROVE_RCU_REPEATEDLY=y, which has been untested
for quite some time.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TINY02 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index f1892e0371c9..3a05bac2ef29 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -8,6 +8,7 @@ CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
 CONFIG_RCU_TRACE=y
 CONFIG_PROVE_LOCKING=y
+CONFIG_PROVE_RCU_REPEATEDLY=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
-- 
cgit v1.2.3


From cdfc7f1ab5ce5534b9ba8e2b11d393a43b0f11bf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:31:51 -0800
Subject: torture: Add tests without slow grace period setup/cleanup

This commit moves CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP,
CONFIG_RCU_TORTURE_TEST_SLOW_INIT, and CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT
from CFcommon to all of the TREE scenarios other than TREE08 and TREE09
in order to do at least some testing without these Kconfig options set.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/CFcommon | 3 ---
 tools/testing/selftests/rcutorture/configs/rcu/TREE01   | 3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TREE02   | 3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TREE03   | 3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TREE04   | 3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TREE05   | 3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TREE06   | 3 +++
 tools/testing/selftests/rcutorture/configs/rcu/TREE07   | 3 +++
 8 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index f824b4c9d9d9..d2d2a86139db 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -1,5 +1,2 @@
 CONFIG_RCU_TORTURE_TEST=y
 CONFIG_PRINTK_TIME=y
-CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
-CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
-CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index f572b873c620..359cb258f639 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -16,3 +16,6 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index ef6a22c44dea..638150c777ed 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -20,3 +20,6 @@ CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index 7a17c503b382..3b93ee544e70 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -17,3 +17,6 @@ CONFIG_RCU_BOOST=y
 CONFIG_RCU_KTHREAD_PRIO=2
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 17cbe098b115..a5fa5042fb7a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -19,3 +19,6 @@ CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 1257d3227b1e..d4cdc0d74e16 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -19,3 +19,6 @@ CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index d3e456b74cbe..4cb02bd28f08 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -20,3 +20,6 @@ CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index 3956b4131f72..b12a3ea1867e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -19,3 +19,6 @@ CONFIG_RCU_NOCB_CPU=n
 CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
-- 
cgit v1.2.3


From ad4b594b5d71d435ce1f8040416d993bfd83c273 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:35:03 -0800
Subject: torture: Run at least one test with CONFIG_DEBUG_OBJECTS_RCU_HEAD

This commit enables the CONFIG_DEBUG_OBJECTS_RCU_HEAD Kconfig option
in TREE02 in order to do at least some testing with this enabled.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TREE02 | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index 638150c777ed..c1ab5926568b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -23,3 +23,4 @@ CONFIG_RCU_EXPERT=y
 CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
 CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
 CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
-- 
cgit v1.2.3


From 24e2f4de665a54d639e9e30e0806e45669742a01 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:40:57 -0800
Subject: torture: Run one test with DEBUG_LOCK_ALLOC but not PROVE_LOCKING

This commit sets CONFIG_DEBUG_LOCK_ALLOC but not CONFIG_PROVE_LOCKING
for TREE08 in order to have at least one test with this configuration.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TREE08 | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index bb9b0c1a23c2..22181eb390fc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -17,8 +17,7 @@ CONFIG_RCU_FANOUT_LEAF=2
 CONFIG_RCU_NOCB_CPU=y
 CONFIG_RCU_NOCB_CPU_ALL=y
 CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_PROVE_LOCKING=y
-#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-- 
cgit v1.2.3


From c31db96f56bf11eb37cdbd38162c2391dc9b3d32 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:51:44 -0800
Subject: torture: Run a couple scenarios with CONFIG_RCU_EQS_DEBUG

This commit runs TREE04 and TREE08 with CONFIG_RCU_EQS_DEBUG=y,
enabling dyntick-counter checking on those two tests.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TREE04 | 1 +
 tools/testing/selftests/rcutorture/configs/rcu/TREE08 | 1 +
 2 files changed, 2 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index a5fa5042fb7a..5af758e783c7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -22,3 +22,4 @@ CONFIG_RCU_EXPERT=y
 CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
 CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
 CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 22181eb390fc..099cc63c6a3b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -21,3 +21,4 @@ CONFIG_PROVE_LOCKING=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
-- 
cgit v1.2.3


From b1c14a39adcf2237f31bcd0cd858bed5c54f2aee Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 8 Nov 2016 16:54:08 -0800
Subject: torture: Update RCU test scenario documentation

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt  | 33 ++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 4e2b1893d40d..364801b1a230 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -14,6 +14,7 @@ CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
 CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
 CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
+CONFIG_PROVE_RCU_REPEATEDLY -- Do one.
 CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
 CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
 CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
@@ -25,7 +26,12 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one.
 CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
 CONFIG_RCU_TRACE -- Do half.
 CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
-!RCU_EXPERT -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios.
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios.
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios.
+
 RCU-bh: Do one with PREEMPT and one with !PREEMPT.
 RCU-sched: Do one with PREEMPT but not BOOST.
 
@@ -72,7 +78,30 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
 
 	Always used in KVM testing.
 
+CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
+CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
+CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
+
+	Inspection suffices, ignore.
+
 CONFIG_PREEMPT_RCU
 CONFIG_TREE_RCU
+CONFIG_TINY_RCU
+
+	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
+
+CONFIG_SPARSE_RCU_POINTER
+
+	Makes sense only for sparse runs, not for kernel builds.
+
+CONFIG_SRCU
+CONFIG_TASKS_RCU
+
+	Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
+
+CONFIG_RCU_TRACE
+
+	Implied by CONFIG_RCU_TRACE for Tree RCU.
+
 
-	These are controlled by CONFIG_PREEMPT.
+boot parameters ignored: TBD
-- 
cgit v1.2.3


From 7d025948e4982ee3fa741c0fa56385c8b4a7072d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 17 Nov 2016 12:54:39 -0800
Subject: torture: Enable DEBUG_OBJECTS_RCU_HEAD for Tiny RCU

The RCU torture tests currently do not run any Tiny RCU scenarios for
CONFIG_DEBUG_OBJECTS_RCU_HEAD=y.  This is a hole in the test, given
that someone might need this in real life and given that Tiny RCU uses
different callback-handling code than does Tree RCU.  This commit
therefore enables this Kconfig option for scenario TINY02.

Reported-by: "Ahmed, Iftekhar" <ahmedi@oregonstate.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 tools/testing/selftests/rcutorture/configs/rcu/TINY02 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index 3a05bac2ef29..a59f7686e219 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -11,5 +11,5 @@ CONFIG_PROVE_LOCKING=y
 CONFIG_PROVE_RCU_REPEATEDLY=y
 #CHECK#CONFIG_PROVE_RCU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_PREEMPT_COUNT=y
-- 
cgit v1.2.3


From 3fbfadce6012e7bb384b2e9ad47869d5177f7209 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Mon, 16 Jan 2017 22:17:29 -0800
Subject: bpf: Fix test_lru_sanity5() in test_lru_map.c

test_lru_sanity5() fails when the number of online cpus
is fewer than the number of possible cpus.  It can be
reproduced with qemu by using cmd args "--smp cpus=2,maxcpus=8".

The problem is the loop in test_lru_sanity5() is testing
'i' which is incorrect.

This patch:
1. Make sched_next_online() always return -1 if it cannot
   find a next cpu to schedule the process.
2. In test_lru_sanity5(), the parent process does
   sched_setaffinity() first (through sched_next_online())
   and the forked process will inherit it according to
   the 'man sched_setaffinity'.

Fixes: 5db58faf989f ("bpf: Add tests for the LRU bpf_htab")
Reported-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 53 +++++++++++++++---------------
 1 file changed, 27 insertions(+), 26 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index b13fed534d76..9f7bd1915c21 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -67,21 +67,23 @@ static int map_equal(int lru_map, int expected)
 	return map_subset(lru_map, expected) && map_subset(expected, lru_map);
 }
 
-static int sched_next_online(int pid, int next_to_try)
+static int sched_next_online(int pid, int *next_to_try)
 {
 	cpu_set_t cpuset;
+	int next = *next_to_try;
+	int ret = -1;
 
-	if (next_to_try == nr_cpus)
-		return -1;
-
-	while (next_to_try < nr_cpus) {
+	while (next < nr_cpus) {
 		CPU_ZERO(&cpuset);
-		CPU_SET(next_to_try++, &cpuset);
-		if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset))
+		CPU_SET(next++, &cpuset);
+		if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) {
+			ret = 0;
 			break;
+		}
 	}
 
-	return next_to_try;
+	*next_to_try = next;
+	return ret;
 }
 
 /* Size of the LRU amp is 2
@@ -96,11 +98,12 @@ static void test_lru_sanity0(int map_type, int map_flags)
 {
 	unsigned long long key, value[nr_cpus];
 	int lru_map_fd, expected_map_fd;
+	int next_cpu = 0;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
-	assert(sched_next_online(0, 0) != -1);
+	assert(sched_next_online(0, &next_cpu) != -1);
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
 		lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus);
@@ -183,6 +186,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	int lru_map_fd, expected_map_fd;
 	unsigned int batch_size;
 	unsigned int map_size;
+	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
 		/* Ther percpu lru list (i.e each cpu has its own LRU
@@ -196,7 +200,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
-	assert(sched_next_online(0, 0) != -1);
+	assert(sched_next_online(0, &next_cpu) != -1);
 
 	batch_size = tgt_free / 2;
 	assert(batch_size * 2 == tgt_free);
@@ -262,6 +266,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	int lru_map_fd, expected_map_fd;
 	unsigned int batch_size;
 	unsigned int map_size;
+	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
 		/* Ther percpu lru list (i.e each cpu has its own LRU
@@ -275,7 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
-	assert(sched_next_online(0, 0) != -1);
+	assert(sched_next_online(0, &next_cpu) != -1);
 
 	batch_size = tgt_free / 2;
 	assert(batch_size * 2 == tgt_free);
@@ -370,11 +375,12 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	int lru_map_fd, expected_map_fd;
 	unsigned int batch_size;
 	unsigned int map_size;
+	int next_cpu = 0;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
-	assert(sched_next_online(0, 0) != -1);
+	assert(sched_next_online(0, &next_cpu) != -1);
 
 	batch_size = tgt_free / 2;
 	assert(batch_size * 2 == tgt_free);
@@ -430,11 +436,12 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 	int lru_map_fd, expected_map_fd;
 	unsigned long long key, value[nr_cpus];
 	unsigned long long end_key;
+	int next_cpu = 0;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
-	assert(sched_next_online(0, 0) != -1);
+	assert(sched_next_online(0, &next_cpu) != -1);
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
 		lru_map_fd = create_map(map_type, map_flags,
@@ -502,9 +509,8 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
 static void test_lru_sanity5(int map_type, int map_flags)
 {
 	unsigned long long key, value[nr_cpus];
-	int next_sched_cpu = 0;
+	int next_cpu = 0;
 	int map_fd;
-	int i;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
 		return;
@@ -519,27 +525,20 @@ static void test_lru_sanity5(int map_type, int map_flags)
 	key = 0;
 	assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
 
-	for (i = 0; i < nr_cpus; i++) {
+	while (sched_next_online(0, &next_cpu) != -1) {
 		pid_t pid;
 
 		pid = fork();
 		if (pid == 0) {
-			next_sched_cpu = sched_next_online(0, next_sched_cpu);
-			if (next_sched_cpu != -1)
-				do_test_lru_sanity5(key, map_fd);
+			do_test_lru_sanity5(key, map_fd);
 			exit(0);
 		} else if (pid == -1) {
-			printf("couldn't spawn #%d process\n", i);
+			printf("couldn't spawn process to test key:%llu\n",
+			       key);
 			exit(1);
 		} else {
 			int status;
 
-			/* It is mostly redundant and just allow the parent
-			 * process to update next_shced_cpu for the next child
-			 * process
-			 */
-			next_sched_cpu = sched_next_online(pid, next_sched_cpu);
-
 			assert(waitpid(pid, &status, 0) == pid);
 			assert(status == 0);
 			key++;
@@ -547,6 +546,8 @@ static void test_lru_sanity5(int map_type, int map_flags)
 	}
 
 	close(map_fd);
+	/* At least one key should be tested */
+	assert(key > 0);
 
 	printf("Pass\n");
 }
-- 
cgit v1.2.3


From 083f345151938706d33969b37b92e7e13ab02b3e Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Thu, 12 Jan 2017 00:11:14 +0900
Subject: selftests/futex: Add stdio used for logging

Fix missing printf compile warnings.

Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/include/logging.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
index 014aa01197af..e14469103f07 100644
--- a/tools/testing/selftests/futex/include/logging.h
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -21,6 +21,7 @@
 #ifndef _LOGGING_H
 #define _LOGGING_H
 
+#include <stdio.h>
 #include <string.h>
 #include <unistd.h>
 #include <linux/futex.h>
-- 
cgit v1.2.3


From 99c21f6d004d07667d7345e56e44a5b805e1dfa9 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Thu, 12 Jan 2017 00:11:16 +0900
Subject: selftests/futex: Add headers to makefile dependencies

The futex makefile did not contain dependencies for all headers, so if
we make changes to logging.h rebuild will not happen. Add headers to
fix it up.

Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/futex/functional/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index ac35782ce1e5..a648e7a6cbc3 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -2,7 +2,10 @@ INCLUDES := -I../include -I../../
 CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
 LDFLAGS := $(LDFLAGS) -pthread -lrt
 
-HEADERS := ../include/futextest.h
+HEADERS := \
+	../include/futextest.h \
+	../include/atomic.h \
+	../include/logging.h
 TEST_GEN_FILES := \
 	futex_wait_timeout \
 	futex_wait_wouldblock \
-- 
cgit v1.2.3


From aff985fd07cf785fbd1544194f8fae08333e523e Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Thu, 12 Jan 2017 00:16:05 +0900
Subject: selftests/intel_pstate: Fix warning on loop index overflow

The build was showing the warning:
 aperf.c:60:27: warning: iteration 2147483647 invokes undefined behavior
 [-Waggressive-loop-optimizations]
  for (i=0; i<0x8fffffff; i++) {

This change sets i, cpu and fd to unsigned int as they should not need
to be signed.

Cc: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/intel_pstate/aperf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index 6046e183f4ad..cd72f3dc83e9 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -14,7 +14,7 @@ void usage(char *name) {
 }
 
 int main(int argc, char **argv) {
-	int i, cpu, fd;
+	unsigned int i, cpu, fd;
 	char msr_file_name[64];
 	long long tsc, old_tsc, new_tsc;
 	long long aperf, old_aperf, new_aperf;
-- 
cgit v1.2.3


From 29fa1d436e96954ebcdac0beecb2e0d96c26350d Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Thu, 12 Jan 2017 00:16:06 +0900
Subject: selftests/intel_pstate: Update makefile to match new style

Recent changes from Bamvor (88baa78d1f318) have standardized the
variable names like TEST_GEN_FILES and removed the need for make targets
all and clean.

These changes bring the intel_pstate test inline with those changes.

Cc: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/intel_pstate/Makefile | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index f5f1a28715ff..19678e90efb2 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,15 +1,10 @@
-CC := $(CROSS_COMPILE)gcc
 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
 LDFLAGS := $(LDFLAGS) -lm
 
-TARGETS := msr aperf
+TEST_GEN_FILES := msr aperf
 
-TEST_PROGS := $(TARGETS) run.sh
+TEST_PROGS := run.sh
 
-.PHONY: all clean
-all: $(TARGETS)
+include ../lib.mk
 
-$(TARGETS): $(HEADERS)
-
-clean:
-	rm -f $(TARGETS)
+$(TEST_GEN_FILES): $(HEADERS)
-- 
cgit v1.2.3


From 6320303fb27ae46919c13b2f9ebf3f3a3ad63859 Mon Sep 17 00:00:00 2001
From: Stafford Horne <shorne@gmail.com>
Date: Thu, 12 Jan 2017 00:16:07 +0900
Subject: selftests: Add intel_pstate to TARGETS

This test was missing from the TARGETS list.  The test requires patches
to cpupower to pass correctly.

Cc: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 2741f9e35215..c6ccf57f2cb5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -8,6 +8,7 @@ TARGETS += firmware
 TARGETS += ftrace
 TARGETS += futex
 TARGETS += gpio
+TARGETS += intel_pstate
 TARGETS += ipc
 TARGETS += kcmp
 TARGETS += lib
-- 
cgit v1.2.3


From e66d5b673741cf6b7da250da9f84a165b1e4342d Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 13 Jan 2017 12:06:45 +0530
Subject: selftest: cpufreq: Add support for cpufreq tests

This patch adds supports for basic cpufreq tests, which can be performed
independent of any platform.

It does basic tests for now, like
- reading all cpufreq files
- trying to update them
- switching frequencies
- switching governors

This can be extended to have more specific tests later on.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/Makefile            |   1 +
 tools/testing/selftests/cpufreq/Makefile    |   8 ++
 tools/testing/selftests/cpufreq/cpu.sh      |  84 ++++++++++++
 tools/testing/selftests/cpufreq/cpufreq.sh  | 201 ++++++++++++++++++++++++++++
 tools/testing/selftests/cpufreq/governor.sh | 146 ++++++++++++++++++++
 tools/testing/selftests/cpufreq/main.sh     | 128 ++++++++++++++++++
 6 files changed, 568 insertions(+)
 create mode 100644 tools/testing/selftests/cpufreq/Makefile
 create mode 100755 tools/testing/selftests/cpufreq/cpu.sh
 create mode 100755 tools/testing/selftests/cpufreq/cpufreq.sh
 create mode 100755 tools/testing/selftests/cpufreq/governor.sh
 create mode 100755 tools/testing/selftests/cpufreq/main.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c6ccf57f2cb5..e23bed2e2e94 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,6 +1,7 @@
 TARGETS =  bpf
 TARGETS += breakpoints
 TARGETS += capabilities
+TARGETS += cpufreq
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
 TARGETS += exec
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
new file mode 100644
index 000000000000..f5c6bb1965a1
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -0,0 +1,8 @@
+all:
+
+TEST_PROGS := main.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh
+
+include ../lib.mk
+
+clean:
diff --git a/tools/testing/selftests/cpufreq/cpu.sh b/tools/testing/selftests/cpufreq/cpu.sh
new file mode 100755
index 000000000000..8e08a83d65f2
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpu.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+#
+# CPU helpers
+
+# protect against multiple inclusion
+if [ $FILE_CPU ]; then
+	return 0
+else
+	FILE_CPU=DONE
+fi
+
+source cpufreq.sh
+
+for_each_cpu()
+{
+	cpus=$(ls $CPUROOT | grep "cpu[0-9].*")
+	for cpu in $cpus; do
+		$@ $cpu
+	done
+}
+
+for_each_non_boot_cpu()
+{
+	cpus=$(ls $CPUROOT | grep "cpu[1-9].*")
+	for cpu in $cpus; do
+		$@ $cpu
+	done
+}
+
+#$1: cpu
+offline_cpu()
+{
+	printf "Offline $1\n"
+	echo 0 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+online_cpu()
+{
+	printf "Online $1\n"
+	echo 1 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+reboot_cpu()
+{
+	offline_cpu $1
+	online_cpu $1
+}
+
+# Reboot CPUs
+# param: number of times we want to run the loop
+reboot_cpus()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+	for i in `seq 1 $1`; do
+		for_each_non_boot_cpu offline_cpu
+		for_each_non_boot_cpu online_cpu
+		printf "\n"
+	done
+
+	printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Prints warning for all CPUs with missing cpufreq directory
+print_unmanaged_cpus()
+{
+	for_each_cpu cpu_should_have_cpufreq_directory
+}
+
+# Counts CPUs with cpufreq directories
+count_cpufreq_managed_cpus()
+{
+	count=0;
+
+	for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+		if [ -d $CPUROOT/$cpu/cpufreq ]; then
+			let count=count+1;
+		fi
+	done
+
+	echo $count;
+}
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
new file mode 100755
index 000000000000..2b8b05aaa874
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+
+# protect against multiple inclusion
+if [ $FILE_CPUFREQ ]; then
+	return 0
+else
+	FILE_CPUFREQ=DONE
+fi
+
+source cpu.sh
+
+
+# $1: cpu
+cpu_should_have_cpufreq_directory()
+{
+	if [ ! -d $CPUROOT/$1/cpufreq ]; then
+		printf "Warning: No cpufreq directory present for $1\n"
+	fi
+}
+
+cpu_should_not_have_cpufreq_directory()
+{
+	if [ -d $CPUROOT/$1/cpufreq ]; then
+		printf "Warning: cpufreq directory present for $1\n"
+	fi
+}
+
+for_each_policy()
+{
+	policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+	for policy in $policies; do
+		$@ $policy
+	done
+}
+
+for_each_policy_concurrent()
+{
+	policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+	for policy in $policies; do
+		$@ $policy &
+	done
+}
+
+# $1: Path
+read_cpufreq_files_in_dir()
+{
+	local files=`ls $1`
+
+	printf "Printing directory: $1\n\n"
+
+	for file in $files; do
+		if [ -f $1/$file ]; then
+			printf "$file:"
+			cat $1/$file
+		else
+			printf "\n"
+			read_cpufreq_files_in_dir "$1/$file"
+		fi
+	done
+	printf "\n"
+}
+
+
+read_all_cpufreq_files()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	read_cpufreq_files_in_dir $CPUFREQROOT
+
+	printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# UPDATE CPUFREQ FILES
+
+# $1: directory path
+update_cpufreq_files_in_dir()
+{
+	local files=`ls $1`
+
+	printf "Updating directory: $1\n\n"
+
+	for file in $files; do
+		if [ -f $1/$file ]; then
+			# file is writable ?
+			local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+
+			if [ ! -z $wfile ]; then
+				# scaling_setspeed is a special file and we
+				# should skip updating it
+				if [ $file != "scaling_setspeed" ]; then
+					local val=$(cat $1/$file)
+					printf "Writing $val to: $file\n"
+					echo $val > $1/$file
+				fi
+			fi
+		else
+			printf "\n"
+			update_cpufreq_files_in_dir "$1/$file"
+		fi
+	done
+
+	printf "\n"
+}
+
+# Update all writable files with their existing values
+update_all_cpufreq_files()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	update_cpufreq_files_in_dir $CPUFREQROOT
+
+	printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# CHANGE CPU FREQUENCIES
+
+# $1: policy
+find_current_freq()
+{
+	cat $CPUFREQROOT/$1/scaling_cur_freq
+}
+
+# $1: policy
+# $2: frequency
+set_cpu_frequency()
+{
+	printf "Change frequency for $1 to $2\n"
+	echo $2 > $CPUFREQROOT/$1/scaling_setspeed
+}
+
+# $1: policy
+test_all_frequencies()
+{
+	local filepath="$CPUFREQROOT/$1"
+
+	backup_governor $1
+
+	local found=$(switch_governor $1 "userspace")
+	if [ $found = 1 ]; then
+		printf "${FUNCNAME[0]}: userspace governor not available for: $1\n"
+		return;
+	fi
+
+	printf "Switched governor for $1 to userspace\n\n"
+
+	local freqs=$(cat $filepath/scaling_available_frequencies)
+	printf "Available frequencies for $1: $freqs\n\n"
+
+	# Set all frequencies one-by-one
+	for freq in $freqs; do
+		set_cpu_frequency $1 $freq
+	done
+
+	printf "\n"
+
+	restore_governor $1
+}
+
+# $1: loop count
+shuffle_frequency_for_all_cpus()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+	for i in `seq 1 $1`; do
+		for_each_policy test_all_frequencies
+	done
+	printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Basic cpufreq tests
+cpufreq_basic_tests()
+{
+	printf "*** RUNNING CPUFREQ SANITY TESTS ***\n"
+	printf "====================================\n\n"
+
+	count=$(count_cpufreq_managed_cpus)
+	if [ $count = 0 ]; then
+		printf "No cpu is managed by cpufreq core, exiting\n"
+		exit;
+	else
+		printf "CPUFreq manages: $count CPUs\n\n"
+	fi
+
+	# Detect & print which CPUs are not managed by cpufreq
+	print_unmanaged_cpus
+
+	# read/update all cpufreq files
+	read_all_cpufreq_files
+	update_all_cpufreq_files
+
+	# hotplug cpus
+	reboot_cpus 5
+
+	# Test all frequencies
+	shuffle_frequency_for_all_cpus 2
+
+	# Test all governors
+	shuffle_governors_for_all_cpus 1
+}
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
new file mode 100755
index 000000000000..2e42432892ab
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/governor.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+#
+# Test governors
+
+# protect against multiple inclusion
+if [ $FILE_GOVERNOR ]; then
+	return 0
+else
+	FILE_GOVERNOR=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+
+CUR_GOV=
+CUR_FREQ=
+
+# Find governor's directory path
+# $1: policy, $2: governor
+find_gov_directory()
+{
+	if [ -d $CPUFREQROOT/$2 ]; then
+		printf "$CPUFREQROOT/$2\n"
+	elif [ -d $CPUFREQROOT/$1/$2 ]; then
+		printf "$CPUFREQROOT/$1/$2\n"
+	else
+		printf "INVALID\n"
+	fi
+}
+
+# $1: policy
+find_current_governor()
+{
+	cat $CPUFREQROOT/$1/scaling_governor
+}
+
+# $1: policy
+backup_governor()
+{
+	CUR_GOV=$(find_current_governor $1)
+
+	printf "Governor backup done for $1: $CUR_GOV\n"
+
+	if [ $CUR_GOV == "userspace" ]; then
+		CUR_FREQ=$(find_current_freq $1)
+		printf "Governor frequency backup done for $1: $CUR_FREQ\n"
+	fi
+
+	printf "\n"
+}
+
+# $1: policy
+restore_governor()
+{
+	__switch_governor $1 $CUR_GOV
+
+	printf "Governor restored for $1 to $CUR_GOV\n"
+
+	if [ $CUR_GOV == "userspace" ]; then
+		set_cpu_frequency $1 $CUR_FREQ
+		printf "Governor frequency restored for $1: $CUR_FREQ\n"
+	fi
+
+	printf "\n"
+}
+
+# param:
+# $1: policy, $2: governor
+__switch_governor()
+{
+	echo $2 > $CPUFREQROOT/$1/scaling_governor
+}
+
+# SWITCH GOVERNORS
+
+# $1: cpu, $2: governor
+switch_governor()
+{
+	local filepath=$CPUFREQROOT/$1/scaling_available_governors
+
+	# check if governor is available
+	local found=$(cat $filepath | grep $2 | wc -l)
+	if [ $found = 0 ]; then
+		echo 1;
+		return
+	fi
+
+	__switch_governor $1 $2
+	echo 0;
+}
+
+# $1: policy, $2: governor
+switch_show_governor()
+{
+	cur_gov=find_current_governor
+	if [ $cur_gov == "userspace" ]; then
+		cur_freq=find_current_freq
+	fi
+
+	# switch governor
+	__switch_governor $1 $2
+
+	printf "\nSwitched governor for $1 to $2\n\n"
+
+	if [ $2 == "userspace" -o $2 == "powersave" -o $2 == "performance" ]; then
+		printf "No files to read for $2 governor\n\n"
+		return
+	fi
+
+	# show governor files
+	local govpath=$(find_gov_directory $1 $2)
+	read_cpufreq_files_in_dir $govpath
+}
+
+# $1: function to be called, $2: policy
+call_for_each_governor()
+{
+	local filepath=$CPUFREQROOT/$2/scaling_available_governors
+
+	# Exit if cpu isn't managed by cpufreq core
+	if [ ! -f $filepath ]; then
+		return;
+	fi
+
+	backup_governor $2
+
+	local governors=$(cat $filepath)
+	printf "Available governors for $2: $governors\n"
+
+	for governor in $governors; do
+		$1 $2 $governor
+	done
+
+	restore_governor $2
+}
+
+# $1: loop count
+shuffle_governors_for_all_cpus()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+	for i in `seq 1 $1`; do
+		for_each_policy call_for_each_governor switch_show_governor
+	done
+	printf "%s\n\n" "------------------------------------------------"
+}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
new file mode 100755
index 000000000000..3224652ccbd4
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+FUNC=basic	# do basic tests by default
+OUTFILE=cpufreq_selftest
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+
+helpme()
+{
+	printf "Usage: $0 [-h] [-to args]
+	[-h <help>]
+	[-o <output-file-for-dump>]
+	[-t <basic: Basic cpufreq testing>]
+	\n"
+	exit 2
+}
+
+prerequisite()
+{
+	msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit 2
+	fi
+
+	taskset -p 01 $$
+
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit 2
+	fi
+
+	CPUROOT=$SYSFS/devices/system/cpu
+	CPUFREQROOT="$CPUROOT/cpufreq"
+
+	if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+		echo $msg cpus not available in sysfs >&2
+		exit 2
+	fi
+
+	if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+		echo $msg cpufreq directory not available in sysfs >&2
+		exit 2
+	fi
+}
+
+parse_arguments()
+{
+	while getopts ht:o: arg
+	do
+		case $arg in
+			h) # --help
+				helpme
+				;;
+
+			t) # --func_type (Function to perform: basic (default: basic))
+				FUNC=$OPTARG
+				;;
+
+			o) # --output-file (Output file to store dumps)
+				OUTFILE=$OPTARG
+				;;
+
+			\?)
+				helpme
+				;;
+		esac
+	done
+}
+
+do_test()
+{
+	# Check if CPUs are managed by cpufreq or not
+	count=$(count_cpufreq_managed_cpus)
+
+	if [ $count = 0 ]; then
+		echo "No cpu is managed by cpufreq core, exiting"
+		exit 2;
+	fi
+
+	case "$FUNC" in
+		"basic")
+		cpufreq_basic_tests
+		;;
+
+		*)
+		echo "Invalid [-f] function type"
+		helpme
+		;;
+	esac
+}
+
+# clear dumps
+# $1: file name
+clear_dumps()
+{
+	echo "" > $1.txt
+	echo "" > $1.dmesg_cpufreq.txt
+	echo "" > $1.dmesg_full.txt
+}
+
+# $1: output file name
+dmesg_dumps()
+{
+	dmesg | grep cpufreq >> $1.dmesg_cpufreq.txt
+
+	# We may need the full logs as well
+	dmesg >> $1.dmesg_full.txt
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+clear_dumps $OUTFILE
+do_test >> $OUTFILE.txt
+dmesg_dumps $OUTFILE
-- 
cgit v1.2.3


From b03eaf8dbac5534590ec52612f789d8fb292af9c Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 13 Jan 2017 12:06:46 +0530
Subject: selftest: cpufreq: Add suspend/resume/hibernate support

This patch adds support to test basic suspend/resume and hibernation to
the cpufreq selftests.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/cpufreq/cpufreq.sh | 40 ++++++++++++++++++++++++++++++
 tools/testing/selftests/cpufreq/main.sh    | 14 +++++++++--
 2 files changed, 52 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index 2b8b05aaa874..1ed3832030b4 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -199,3 +199,43 @@ cpufreq_basic_tests()
 	# Test all governors
 	shuffle_governors_for_all_cpus 1
 }
+
+# Suspend/resume
+# $1: "suspend" or "hibernate", $2: loop count
+do_suspend()
+{
+	printf "** Test: Running ${FUNCNAME[0]}: Trying $1 for $2 loops **\n\n"
+
+	# Is the directory available
+	if [ ! -d $SYSFS/power/ -o ! -f $SYSFS/power/state ]; then
+		printf "$SYSFS/power/state not available\n"
+		return 1
+	fi
+
+	if [ $1 = "suspend" ]; then
+		filename="mem"
+	elif [ $1 = "hibernate" ]; then
+		filename="disk"
+	else
+		printf "$1 is not a valid option\n"
+		return 1
+	fi
+
+	if [ -n $filename ]; then
+		present=$(cat $SYSFS/power/state | grep $filename)
+
+		if [ -z "$present" ]; then
+			printf "Tried to $1 but $filename isn't present in $SYSFS/power/state\n"
+			return 1;
+		fi
+
+		for i in `seq 1 $2`; do
+			printf "Starting $1\n"
+			echo $filename > $SYSFS/power/state
+			printf "Came out of $1\n"
+
+			printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
+			cpufreq_basic_tests
+		done
+	fi
+}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 3224652ccbd4..9ff662f67ea4 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -15,7 +15,9 @@ helpme()
 	printf "Usage: $0 [-h] [-to args]
 	[-h <help>]
 	[-o <output-file-for-dump>]
-	[-t <basic: Basic cpufreq testing>]
+	[-t <basic: Basic cpufreq testing
+	     suspend: suspend/resume,
+	     hibernate: hibernate/resume>]
 	\n"
 	exit 2
 }
@@ -61,7 +63,7 @@ parse_arguments()
 				helpme
 				;;
 
-			t) # --func_type (Function to perform: basic (default: basic))
+			t) # --func_type (Function to perform: basic, suspend, hibernate (default: basic))
 				FUNC=$OPTARG
 				;;
 
@@ -91,6 +93,14 @@ do_test()
 		cpufreq_basic_tests
 		;;
 
+		"suspend")
+		do_suspend "suspend" 1
+		;;
+
+		"hibernate")
+		do_suspend "hibernate" 1
+		;;
+
 		*)
 		echo "Invalid [-f] function type"
 		helpme
-- 
cgit v1.2.3


From 6751faf3d8338d525e8d9c35ae87cbfed48ce958 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 13 Jan 2017 12:06:47 +0530
Subject: selftest: cpufreq: Add support to test cpufreq modules

This patch adds support for cpufreq modules like cpufreq drivers and
cpufreq governors. The tests will insert the modules in different orders
and them perform basic cpufreq tests. The modules are then removed from
the kernel.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/cpufreq/Makefile  |   2 +-
 tools/testing/selftests/cpufreq/main.sh   |  45 +++++-
 tools/testing/selftests/cpufreq/module.sh | 243 ++++++++++++++++++++++++++++++
 3 files changed, 284 insertions(+), 6 deletions(-)
 create mode 100755 tools/testing/selftests/cpufreq/module.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
index f5c6bb1965a1..80c8727dcec1 100644
--- a/tools/testing/selftests/cpufreq/Makefile
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -1,7 +1,7 @@
 all:
 
 TEST_PROGS := main.sh
-TEST_FILES := cpu.sh cpufreq.sh governor.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 9ff662f67ea4..2515867ac9de 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -3,6 +3,7 @@
 source cpu.sh
 source cpufreq.sh
 source governor.sh
+source module.sh
 
 FUNC=basic	# do basic tests by default
 OUTFILE=cpufreq_selftest
@@ -12,12 +13,15 @@ CPUFREQROOT=
 
 helpme()
 {
-	printf "Usage: $0 [-h] [-to args]
+	printf "Usage: $0 [-h] [-todg args]
 	[-h <help>]
 	[-o <output-file-for-dump>]
 	[-t <basic: Basic cpufreq testing
 	     suspend: suspend/resume,
-	     hibernate: hibernate/resume>]
+	     hibernate: hibernate/resume,
+	     modtest: test driver or governor modules. Only to be used with -d or -g options>]
+	[-d <driver's module name: only with \"-t modtest>\"]
+	[-g <governor's module name: only with \"-t modtest>\"]
 	\n"
 	exit 2
 }
@@ -56,14 +60,14 @@ prerequisite()
 
 parse_arguments()
 {
-	while getopts ht:o: arg
+	while getopts ht:o:d:g: arg
 	do
 		case $arg in
 			h) # --help
 				helpme
 				;;
 
-			t) # --func_type (Function to perform: basic, suspend, hibernate (default: basic))
+			t) # --func_type (Function to perform: basic, suspend, hibernate, modtest (default: basic))
 				FUNC=$OPTARG
 				;;
 
@@ -71,6 +75,14 @@ parse_arguments()
 				OUTFILE=$OPTARG
 				;;
 
+			d) # --driver-mod-name (Name of the driver module)
+				DRIVER_MOD=$OPTARG
+				;;
+
+			g) # --governor-mod-name (Name of the governor module)
+				GOVERNOR_MOD=$OPTARG
+				;;
+
 			\?)
 				helpme
 				;;
@@ -83,7 +95,7 @@ do_test()
 	# Check if CPUs are managed by cpufreq or not
 	count=$(count_cpufreq_managed_cpus)
 
-	if [ $count = 0 ]; then
+	if [ $count = 0 -a $FUNC != "modtest" ]; then
 		echo "No cpu is managed by cpufreq core, exiting"
 		exit 2;
 	fi
@@ -101,6 +113,29 @@ do_test()
 		do_suspend "hibernate" 1
 		;;
 
+		"modtest")
+		# Do we have modules in place?
+		if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
+			echo "No driver or governor module passed with -d or -g"
+			exit 2;
+		fi
+
+		if [ $DRIVER_MOD ]; then
+			if [ $GOVERNOR_MOD ]; then
+				module_test $DRIVER_MOD $GOVERNOR_MOD
+			else
+				module_driver_test $DRIVER_MOD
+			fi
+		else
+			if [ $count = 0 ]; then
+				echo "No cpu is managed by cpufreq core, exiting"
+				exit 2;
+			fi
+
+			module_governor_test $GOVERNOR_MOD
+		fi
+		;;
+
 		*)
 		echo "Invalid [-f] function type"
 		helpme
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
new file mode 100755
index 000000000000..8ff2244a33a1
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -0,0 +1,243 @@
+#!/bin/bash
+#
+# Modules specific tests cases
+
+# protect against multiple inclusion
+if [ $FILE_MODULE ]; then
+	return 0
+else
+	FILE_MODULE=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Check basic insmod/rmmod
+# $1: module
+test_basic_insmod_rmmod()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	printf "Inserting $1 module\n"
+	# insert module
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		exit;
+	fi
+
+	printf "Removing $1 module\n"
+	# remove module
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		exit;
+	fi
+
+	printf "\n"
+}
+
+# Insert cpufreq driver module and perform basic tests
+# $1: cpufreq-driver module to insert
+# $2: If we want to play with CPUs (1) or not (0)
+module_driver_test_single()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for driver $1 and cpus_hotplug=$2 **\n\n"
+
+	if [ $2 -eq 1 ]; then
+		# offline all non-boot CPUs
+		for_each_non_boot_cpu offline_cpu
+		printf "\n"
+	fi
+
+	# insert module
+	printf "Inserting $1 module\n\n"
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		return;
+	fi
+
+	if [ $2 -eq 1 ]; then
+		# online all non-boot CPUs
+		for_each_non_boot_cpu online_cpu
+		printf "\n"
+	fi
+
+	# run basic tests
+	cpufreq_basic_tests
+
+	# remove module
+	printf "Removing $1 module\n\n"
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		return;
+	fi
+
+	# There shouldn't be any cpufreq directories now.
+	for_each_cpu cpu_should_not_have_cpufreq_directory
+	printf "\n"
+}
+
+# $1: cpufreq-driver module to insert
+module_driver_test()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	# check if module is present or not
+	ls $1 > /dev/null
+	if [ $? != 0 ]; then
+		printf "$1: not present in `pwd` folder\n"
+		return;
+	fi
+
+	# test basic module tests
+	test_basic_insmod_rmmod $1
+
+	# Do simple module test
+	module_driver_test_single $1 0
+
+	# Remove CPUs before inserting module and then bring them back
+	module_driver_test_single $1 1
+	printf "\n"
+}
+
+# find governor name based on governor module name
+# $1: governor module name
+find_gov_name()
+{
+	if [ $1 = "cpufreq_ondemand.ko" ]; then
+		printf "ondemand"
+	elif [ $1 = "cpufreq_conservative.ko" ]; then
+		printf "conservative"
+	elif [ $1 = "cpufreq_userspace.ko" ]; then
+		printf "userspace"
+	elif [ $1 = "cpufreq_performance.ko" ]; then
+		printf "performance"
+	elif [ $1 = "cpufreq_powersave.ko" ]; then
+		printf "powersave"
+	elif [ $1 = "cpufreq_schedutil.ko" ]; then
+		printf "schedutil"
+	fi
+}
+
+# $1: governor string, $2: governor module, $3: policy
+# example: module_governor_test_single "ondemand" "cpufreq_ondemand.ko" 2
+module_governor_test_single()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $3 **\n\n"
+
+	backup_governor $3
+
+	# switch to new governor
+	printf "Switch from $CUR_GOV to $1\n"
+	switch_show_governor $3 $1
+
+	# try removing module, it should fail as governor is used
+	printf "Removing $2 module\n\n"
+	rmmod $2
+	if [ $? = 0 ]; then
+		printf "WARN: rmmod $2 succeeded even if governor is used\n"
+		insmod $2
+	else
+		printf "Pass: unable to remove $2 while it is being used\n\n"
+	fi
+
+	# switch back to old governor
+	printf "Switchback to $CUR_GOV from $1\n"
+	restore_governor $3
+	printf "\n"
+}
+
+# Insert cpufreq governor module and perform basic tests
+# $1: cpufreq-governor module to insert
+module_governor_test()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	# check if module is present or not
+	ls $1 > /dev/null
+	if [ $? != 0 ]; then
+		printf "$1: not present in `pwd` folder\n"
+		return;
+	fi
+
+	# test basic module tests
+	test_basic_insmod_rmmod $1
+
+	# insert module
+	printf "Inserting $1 module\n\n"
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		return;
+	fi
+
+	# switch to new governor for each cpu
+	for_each_policy module_governor_test_single $(find_gov_name $1) $1
+
+	# remove module
+	printf "Removing $1 module\n\n"
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		return;
+	fi
+	printf "\n"
+}
+
+# test modules: driver and governor
+# $1: driver module, $2: governor module
+module_test()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	# check if modules are present or not
+	ls $1 $2 > /dev/null
+	if [ $? != 0 ]; then
+		printf "$1 or $2: is not present in `pwd` folder\n"
+		return;
+	fi
+
+	# TEST1: Insert gov after driver
+	# insert driver module
+	printf "Inserting $1 module\n\n"
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		return;
+	fi
+
+	# run governor tests
+	module_governor_test $2
+
+	# remove driver module
+	printf "Removing $1 module\n\n"
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		return;
+	fi
+
+	# TEST2: Insert driver after governor
+	# insert governor module
+	printf "Inserting $2 module\n\n"
+	insmod $2
+	if [ $? != 0 ]; then
+		printf "Insmod $2 failed\n"
+		return;
+	fi
+
+	# run governor tests
+	module_driver_test $1
+
+	# remove driver module
+	printf "Removing $2 module\n\n"
+	rmmod $2
+	if [ $? != 0 ]; then
+		printf "rmmod $2 failed\n"
+		return;
+	fi
+}
-- 
cgit v1.2.3


From 1e4c2830c36914feb7d03916e21f7c2e45dc2055 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 13 Jan 2017 12:06:48 +0530
Subject: selftest: cpufreq: Add special tests

This patch adds support for special tests which were reported on the PM
list over the years, which helped catching core bugs by several
developers.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/cpufreq/Makefile         |   2 +-
 tools/testing/selftests/cpufreq/governor.sh      |   7 ++
 tools/testing/selftests/cpufreq/main.sh          |  25 ++++-
 tools/testing/selftests/cpufreq/special-tests.sh | 115 +++++++++++++++++++++++
 4 files changed, 146 insertions(+), 3 deletions(-)
 create mode 100755 tools/testing/selftests/cpufreq/special-tests.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
index 80c8727dcec1..3955cd96f3a2 100644
--- a/tools/testing/selftests/cpufreq/Makefile
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -1,7 +1,7 @@
 all:
 
 TEST_PROGS := main.sh
-TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
index 2e42432892ab..def645103555 100755
--- a/tools/testing/selftests/cpufreq/governor.sh
+++ b/tools/testing/selftests/cpufreq/governor.sh
@@ -71,6 +71,13 @@ __switch_governor()
 	echo $2 > $CPUFREQROOT/$1/scaling_governor
 }
 
+# param:
+# $1: cpu, $2: governor
+__switch_governor_for_cpu()
+{
+	echo $2 > $CPUROOT/$1/cpufreq/scaling_governor
+}
+
 # SWITCH GOVERNORS
 
 # $1: cpu, $2: governor
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 2515867ac9de..01bac76ac0ec 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -4,6 +4,7 @@ source cpu.sh
 source cpufreq.sh
 source governor.sh
 source module.sh
+source special-tests.sh
 
 FUNC=basic	# do basic tests by default
 OUTFILE=cpufreq_selftest
@@ -19,7 +20,11 @@ helpme()
 	[-t <basic: Basic cpufreq testing
 	     suspend: suspend/resume,
 	     hibernate: hibernate/resume,
-	     modtest: test driver or governor modules. Only to be used with -d or -g options>]
+	     modtest: test driver or governor modules. Only to be used with -d or -g options,
+	     sptest1: Simple governor switch to produce lockdep.
+	     sptest2: Concurrent governor switch to produce lockdep.
+	     sptest3: Governor races, shuffle between governors quickly.
+	     sptest4: CPU hotplugs with updates to cpufreq files.>]
 	[-d <driver's module name: only with \"-t modtest>\"]
 	[-g <governor's module name: only with \"-t modtest>\"]
 	\n"
@@ -67,7 +72,7 @@ parse_arguments()
 				helpme
 				;;
 
-			t) # --func_type (Function to perform: basic, suspend, hibernate, modtest (default: basic))
+			t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
 				FUNC=$OPTARG
 				;;
 
@@ -136,6 +141,22 @@ do_test()
 		fi
 		;;
 
+		"sptest1")
+		simple_lockdep
+		;;
+
+		"sptest2")
+		concurrent_lockdep
+		;;
+
+		"sptest3")
+		governor_race
+		;;
+
+		"sptest4")
+		hotplug_with_updates
+		;;
+
 		*)
 		echo "Invalid [-f] function type"
 		helpme
diff --git a/tools/testing/selftests/cpufreq/special-tests.sh b/tools/testing/selftests/cpufreq/special-tests.sh
new file mode 100755
index 000000000000..58b730f23ef7
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/special-tests.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+#
+# Special test cases reported by people
+
+# Testcase 1: Reported here: http://marc.info/?l=linux-pm&m=140618592709858&w=2
+
+# protect against multiple inclusion
+if [ $FILE_SPECIAL ]; then
+	return 0
+else
+	FILE_SPECIAL=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Test 1
+# $1: policy
+__simple_lockdep()
+{
+	# switch to ondemand
+	__switch_governor $1 "ondemand"
+
+	# cat ondemand files
+	local ondir=$(find_gov_directory $1 "ondemand")
+	if [ -z $ondir ]; then
+		printf "${FUNCNAME[0]}Ondemand directory not created, quit"
+		return
+	fi
+
+	cat $ondir/*
+
+	# switch to conservative
+	__switch_governor $1 "conservative"
+}
+
+simple_lockdep()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+	for_each_policy __simple_lockdep
+}
+
+# Test 2
+# $1: policy
+__concurrent_lockdep()
+{
+	for i in `seq 0 100`; do
+		__simple_lockdep $1
+	done
+}
+
+concurrent_lockdep()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+	for_each_policy_concurrent __concurrent_lockdep
+}
+
+# Test 3
+quick_shuffle()
+{
+	# this is called concurrently from governor_race
+	for I in `seq 1000`
+	do
+		echo ondemand | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+		echo userspace | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+	done
+}
+
+governor_race()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+	# run 8 concurrent instances
+	for I in `seq 8`
+	do
+		quick_shuffle &
+	done
+}
+
+# Test 4
+# $1: cpu
+hotplug_with_updates_cpu()
+{
+	local filepath="$CPUROOT/$1/cpufreq"
+
+	# switch to ondemand
+	__switch_governor_for_cpu $1 "ondemand"
+
+	for i in `seq 1 5000`
+	do
+		reboot_cpu $1
+	done &
+
+	local freqs=$(cat $filepath/scaling_available_frequencies)
+	local oldfreq=$(cat $filepath/scaling_min_freq)
+
+	for j in `seq 1 5000`
+	do
+		# Set all frequencies one-by-one
+		for freq in $freqs; do
+			echo $freq > $filepath/scaling_min_freq
+		done
+	done
+
+	# restore old freq
+	echo $oldfreq > $filepath/scaling_min_freq
+}
+
+hotplug_with_updates()
+{
+	for_each_non_boot_cpu hotplug_with_updates_cpu
+}
-- 
cgit v1.2.3


From 4d3381f5a322dd5db2477e224821790478488173 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Sat, 21 Jan 2017 17:26:12 +0100
Subject: bpf: Add tests for the lpm trie map

The first part of this program runs randomized tests against the
lpm-bpf-map. It implements a "Trivial Longest Prefix Match" (tlpm)
based on simple, linear, single linked lists. The implementation
should be pretty straightforward.

Based on tlpm, this inserts randomized data into bpf-lpm-maps and
verifies the trie-based bpf-map implementation behaves the same way
as tlpm.

The second part uses 'real world' IPv4 and IPv6 addresses and tests
the trie with those.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Daniel Mack <daniel@zonque.org>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/.gitignore     |   1 +
 tools/testing/selftests/bpf/Makefile       |   4 +-
 tools/testing/selftests/bpf/test_lpm_map.c | 358 +++++++++++++++++++++++++++++
 3 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_lpm_map.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 071431bedde8..d3b1c9bca407 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,3 +1,4 @@
 test_verifier
 test_maps
 test_lru_map
+test_lpm_map
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7a5f24543a5f..064a3e5f2836 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,8 +1,8 @@
 CFLAGS += -Wall -O2 -I../../../../usr/include
 
-test_objs = test_verifier test_maps test_lru_map
+test_objs = test_verifier test_maps test_lru_map test_lpm_map
 
-TEST_PROGS := test_verifier test_maps test_lru_map test_kmod.sh
+TEST_PROGS := test_verifier test_maps test_lru_map test_lpm_map test_kmod.sh
 TEST_FILES := $(test_objs)
 
 all: $(test_objs)
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
new file mode 100644
index 000000000000..26775c00273f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -0,0 +1,358 @@
+/*
+ * Randomized tests for eBPF longest-prefix-match maps
+ *
+ * This program runs randomized tests against the lpm-bpf-map. It implements a
+ * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
+ * lists. The implementation should be pretty straightforward.
+ *
+ * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
+ * the trie-based bpf-map implementation behaves the same way as tlpm.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include "bpf_sys.h"
+#include "bpf_util.h"
+
+struct tlpm_node {
+	struct tlpm_node *next;
+	size_t n_bits;
+	uint8_t key[];
+};
+
+static struct tlpm_node *tlpm_add(struct tlpm_node *list,
+				  const uint8_t *key,
+				  size_t n_bits)
+{
+	struct tlpm_node *node;
+	size_t n;
+
+	/* add new entry with @key/@n_bits to @list and return new head */
+
+	n = (n_bits + 7) / 8;
+	node = malloc(sizeof(*node) + n);
+	assert(node);
+
+	node->next = list;
+	node->n_bits = n_bits;
+	memcpy(node->key, key, n);
+
+	return node;
+}
+
+static void tlpm_clear(struct tlpm_node *list)
+{
+	struct tlpm_node *node;
+
+	/* free all entries in @list */
+
+	while ((node = list)) {
+		list = list->next;
+		free(node);
+	}
+}
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+				    const uint8_t *key,
+				    size_t n_bits)
+{
+	struct tlpm_node *best = NULL;
+	size_t i;
+
+	/* Perform longest prefix-match on @key/@n_bits. That is, iterate all
+	 * entries and match each prefix against @key. Remember the "best"
+	 * entry we find (i.e., the longest prefix that matches) and return it
+	 * to the caller when done.
+	 */
+
+	for ( ; list; list = list->next) {
+		for (i = 0; i < n_bits && i < list->n_bits; ++i) {
+			if ((key[i / 8] & (1 << (7 - i % 8))) !=
+			    (list->key[i / 8] & (1 << (7 - i % 8))))
+				break;
+		}
+
+		if (i >= list->n_bits) {
+			if (!best || i > best->n_bits)
+				best = list;
+		}
+	}
+
+	return best;
+}
+
+static void test_lpm_basic(void)
+{
+	struct tlpm_node *list = NULL, *t1, *t2;
+
+	/* very basic, static tests to verify tlpm works as expected */
+
+	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+	t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
+	assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
+	assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
+	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
+
+	t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+	assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
+	assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
+
+	tlpm_clear(list);
+}
+
+static void test_lpm_order(void)
+{
+	struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
+	size_t i, j;
+
+	/* Verify the tlpm implementation works correctly regardless of the
+	 * order of entries. Insert a random set of entries into @l1, and copy
+	 * the same data in reverse order into @l2. Then verify a lookup of
+	 * random keys will yield the same result in both sets.
+	 */
+
+	for (i = 0; i < (1 << 12); ++i)
+		l1 = tlpm_add(l1, (uint8_t[]){
+					rand() % 0xff,
+					rand() % 0xff,
+				}, rand() % 16 + 1);
+
+	for (t1 = l1; t1; t1 = t1->next)
+		l2 = tlpm_add(l2, t1->key, t1->n_bits);
+
+	for (i = 0; i < (1 << 8); ++i) {
+		uint8_t key[] = { rand() % 0xff, rand() % 0xff };
+
+		t1 = tlpm_match(l1, key, 16);
+		t2 = tlpm_match(l2, key, 16);
+
+		assert(!t1 == !t2);
+		if (t1) {
+			assert(t1->n_bits == t2->n_bits);
+			for (j = 0; j < t1->n_bits; ++j)
+				assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
+				       (t2->key[j / 8] & (1 << (7 - j % 8))));
+		}
+	}
+
+	tlpm_clear(l1);
+	tlpm_clear(l2);
+}
+
+static void test_lpm_map(int keysize)
+{
+	size_t i, j, n_matches, n_nodes, n_lookups;
+	struct tlpm_node *t, *list = NULL;
+	struct bpf_lpm_trie_key *key;
+	uint8_t *data, *value;
+	int r, map;
+
+	/* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
+	 * prefixes and insert it into both tlpm and bpf-lpm. Then run some
+	 * randomized lookups and verify both maps return the same result.
+	 */
+
+	n_matches = 0;
+	n_nodes = 1 << 8;
+	n_lookups = 1 << 16;
+
+	data = alloca(keysize);
+	memset(data, 0, keysize);
+
+	value = alloca(keysize + 1);
+	memset(value, 0, keysize + 1);
+
+	key = alloca(sizeof(*key) + keysize);
+	memset(key, 0, sizeof(*key) + keysize);
+
+	map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE,
+			     sizeof(*key) + keysize,
+			     keysize + 1,
+			     4096,
+			     BPF_F_NO_PREALLOC);
+	assert(map >= 0);
+
+	for (i = 0; i < n_nodes; ++i) {
+		for (j = 0; j < keysize; ++j)
+			value[j] = rand() & 0xff;
+		value[keysize] = rand() % (8 * keysize + 1);
+
+		list = tlpm_add(list, value, value[keysize]);
+
+		key->prefixlen = value[keysize];
+		memcpy(key->data, value, keysize);
+		r = bpf_map_update(map, key, value, 0);
+		assert(!r);
+	}
+
+	for (i = 0; i < n_lookups; ++i) {
+		for (j = 0; j < keysize; ++j)
+			data[j] = rand() & 0xff;
+
+		t = tlpm_match(list, data, 8 * keysize);
+
+		key->prefixlen = 8 * keysize;
+		memcpy(key->data, data, keysize);
+		r = bpf_map_lookup(map, key, value);
+		assert(!r || errno == ENOENT);
+		assert(!t == !!r);
+
+		if (t) {
+			++n_matches;
+			assert(t->n_bits == value[keysize]);
+			for (j = 0; j < t->n_bits; ++j)
+				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+				       (value[j / 8] & (1 << (7 - j % 8))));
+		}
+	}
+
+	close(map);
+	tlpm_clear(list);
+
+	/* With 255 random nodes in the map, we are pretty likely to match
+	 * something on every lookup. For statistics, use this:
+	 *
+	 *     printf("  nodes: %zu\n"
+	 *            "lookups: %zu\n"
+	 *            "matches: %zu\n", n_nodes, n_lookups, n_matches);
+	 */
+}
+
+/* Test the implementation with some 'real world' examples */
+
+static void test_lpm_ipaddr(void)
+{
+	struct bpf_lpm_trie_key *key_ipv4;
+	struct bpf_lpm_trie_key *key_ipv6;
+	size_t key_size_ipv4;
+	size_t key_size_ipv6;
+	int map_fd_ipv4;
+	int map_fd_ipv6;
+	__u64 value;
+
+	key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32);
+	key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4;
+	key_ipv4 = alloca(key_size_ipv4);
+	key_ipv6 = alloca(key_size_ipv6);
+
+	map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE,
+				     key_size_ipv4, sizeof(value),
+				     100, BPF_F_NO_PREALLOC);
+	assert(map_fd_ipv4 >= 0);
+
+	map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE,
+				     key_size_ipv6, sizeof(value),
+				     100, BPF_F_NO_PREALLOC);
+	assert(map_fd_ipv6 >= 0);
+
+	/* Fill data some IPv4 and IPv6 address ranges */
+	value = 1;
+	key_ipv4->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 2;
+	key_ipv4->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 3;
+	key_ipv4->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
+	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 5;
+	key_ipv4->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
+	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 4;
+	key_ipv4->prefixlen = 23;
+	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 0xdeadbeef;
+	key_ipv6->prefixlen = 64;
+	inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
+	assert(bpf_map_update(map_fd_ipv6, key_ipv6, &value, 0) == 0);
+
+	/* Set tprefixlen to maximum for lookups */
+	key_ipv4->prefixlen = 32;
+	key_ipv6->prefixlen = 128;
+
+	/* Test some lookups that should come back with a value */
+	inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
+	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == 0);
+	assert(value == 3);
+
+	inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
+	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == 0);
+	assert(value == 2);
+
+	inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
+	assert(bpf_map_lookup(map_fd_ipv6, key_ipv6, &value) == 0);
+	assert(value == 0xdeadbeef);
+
+	inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
+	assert(bpf_map_lookup(map_fd_ipv6, key_ipv6, &value) == 0);
+	assert(value == 0xdeadbeef);
+
+	/* Test some lookups that should not match any entry */
+	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
+	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == -1 &&
+	       errno == ENOENT);
+
+	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
+	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == -1 &&
+	       errno == ENOENT);
+
+	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
+	assert(bpf_map_lookup(map_fd_ipv6, key_ipv6, &value) == -1 &&
+	       errno == ENOENT);
+
+	close(map_fd_ipv4);
+	close(map_fd_ipv6);
+}
+
+int main(void)
+{
+	struct rlimit limit  = { RLIM_INFINITY, RLIM_INFINITY };
+	int i, ret;
+
+	/* we want predictable, pseudo random tests */
+	srand(0xf00ba1);
+
+	/* allow unlimited locked memory */
+	ret = setrlimit(RLIMIT_MEMLOCK, &limit);
+	if (ret < 0)
+		perror("Unable to lift memlock rlimit");
+
+	test_lpm_basic();
+	test_lpm_order();
+
+	/* Test with 8, 16, 24, 32, ... 128 bit prefix length */
+	for (i = 1; i <= 16; ++i)
+		test_lpm_map(i);
+
+	test_lpm_ipaddr();
+
+	printf("test_lpm: OK\n");
+	return 0;
+}
-- 
cgit v1.2.3


From 62b64660262ab512cb428c9dd6e5a5586a0beb53 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 24 Jan 2017 01:06:29 +0100
Subject: bpf: add prog tag test case to bpf selftests

Add the test case used to compare the results from fdinfo with
af_alg's output on the tag. Tests are from min to max sized
programs, with and without maps included.

  # ./test_tag
  test_tag: OK (40945 tests)

Tested on x86_64 and s390x.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile   |   4 +-
 tools/testing/selftests/bpf/test_tag.c | 202 +++++++++++++++++++++++++++++++++
 2 files changed, 204 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/test_tag.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 064a3e5f2836..769a6cb42b4b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,8 +1,8 @@
 CFLAGS += -Wall -O2 -I../../../../usr/include
 
-test_objs = test_verifier test_maps test_lru_map test_lpm_map
+test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
-TEST_PROGS := test_verifier test_maps test_lru_map test_lpm_map test_kmod.sh
+TEST_PROGS := $(test_objs) test_kmod.sh
 TEST_FILES := $(test_objs)
 
 all: $(test_objs)
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
new file mode 100644
index 000000000000..6ab4793b3d16
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sched.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <sys/socket.h>
+#include <sys/resource.h>
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_alg.h>
+
+#include "../../../include/linux/filter.h"
+
+#include "bpf_sys.h"
+
+static struct bpf_insn prog[BPF_MAXINSNS];
+
+static void bpf_gen_imm_prog(unsigned int insns, int fd_map)
+{
+	int i;
+
+	srand(time(NULL));
+	for (i = 0; i < insns; i++)
+		prog[i] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, rand());
+	prog[i - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_gen_map_prog(unsigned int insns, int fd_map)
+{
+	int i, j = 0;
+
+	for (i = 0; i + 1 < insns; i += 2) {
+		struct bpf_insn tmp[] = {
+			BPF_LD_MAP_FD(j++ % BPF_REG_10, fd_map)
+		};
+
+		memcpy(&prog[i], tmp, sizeof(tmp));
+	}
+	if (insns % 2 == 0)
+		prog[insns - 2] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, 42);
+	prog[insns - 1] = BPF_EXIT_INSN();
+}
+
+static int bpf_try_load_prog(int insns, int fd_map,
+			     void (*bpf_filler)(unsigned int insns,
+						int fd_map))
+{
+	int fd_prog;
+
+	bpf_filler(insns, fd_map);
+	fd_prog = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, prog, insns *
+				sizeof(struct bpf_insn), "", NULL, 0);
+	assert(fd_prog > 0);
+	if (fd_map > 0)
+		bpf_filler(insns, 0);
+	return fd_prog;
+}
+
+static int __hex2bin(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	ch = tolower(ch);
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	return -1;
+}
+
+static int hex2bin(uint8_t *dst, const char *src, size_t count)
+{
+	while (count--) {
+		int hi = __hex2bin(*src++);
+		int lo = __hex2bin(*src++);
+
+		if ((hi < 0) || (lo < 0))
+			return -1;
+		*dst++ = (hi << 4) | lo;
+	}
+	return 0;
+}
+
+static void tag_from_fdinfo(int fd_prog, uint8_t *tag, uint32_t len)
+{
+	const int prefix_len = sizeof("prog_tag:\t") - 1;
+	char buff[256];
+	int ret = -1;
+	FILE *fp;
+
+	snprintf(buff, sizeof(buff), "/proc/%d/fdinfo/%d", getpid(),
+		 fd_prog);
+	fp = fopen(buff, "r");
+	assert(fp);
+
+	while (fgets(buff, sizeof(buff), fp)) {
+		if (strncmp(buff, "prog_tag:\t", len))
+			continue;
+		ret = hex2bin(tag, buff + prefix_len, len);
+		break;
+	}
+
+	fclose(fp);
+	assert(!ret);
+}
+
+static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
+{
+	static const struct sockaddr_alg alg = {
+		.salg_family	= AF_ALG,
+		.salg_type	= "hash",
+		.salg_name	= "sha1",
+	};
+	int fd_base, fd_alg, ret;
+	ssize_t size;
+
+	fd_base = socket(AF_ALG, SOCK_SEQPACKET, 0);
+	assert(fd_base > 0);
+
+	ret = bind(fd_base, (struct sockaddr *)&alg, sizeof(alg));
+	assert(!ret);
+
+	fd_alg = accept(fd_base, NULL, 0);
+	assert(fd_alg > 0);
+
+	insns *= sizeof(struct bpf_insn);
+	size = write(fd_alg, prog, insns);
+	assert(size == insns);
+
+	size = read(fd_alg, tag, len);
+	assert(size == len);
+
+	close(fd_alg);
+	close(fd_base);
+}
+
+static void tag_dump(const char *prefix, uint8_t *tag, uint32_t len)
+{
+	int i;
+
+	printf("%s", prefix);
+	for (i = 0; i < len; i++)
+		printf("%02x", tag[i]);
+	printf("\n");
+}
+
+static void tag_exit_report(int insns, int fd_map, uint8_t *ftag,
+			    uint8_t *atag, uint32_t len)
+{
+	printf("Program tag mismatch for %d insns%s!\n", insns,
+	       fd_map < 0 ? "" : " with map");
+
+	tag_dump("  fdinfo result: ", ftag, len);
+	tag_dump("  af_alg result: ", atag, len);
+	exit(1);
+}
+
+static void do_test(uint32_t *tests, int start_insns, int fd_map,
+		    void (*bpf_filler)(unsigned int insns, int fd))
+{
+	int i, fd_prog;
+
+	for (i = start_insns; i <= BPF_MAXINSNS; i++) {
+		uint8_t ftag[8], atag[sizeof(ftag)];
+
+		fd_prog = bpf_try_load_prog(i, fd_map, bpf_filler);
+		tag_from_fdinfo(fd_prog, ftag, sizeof(ftag));
+		tag_from_alg(i, atag, sizeof(atag));
+		if (memcmp(ftag, atag, sizeof(ftag)))
+			tag_exit_report(i, fd_map, ftag, atag, sizeof(ftag));
+
+		close(fd_prog);
+		sched_yield();
+		(*tests)++;
+	}
+}
+
+int main(void)
+{
+	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
+	uint32_t tests = 0;
+	int i, fd_map;
+
+	setrlimit(RLIMIT_MEMLOCK, &rinf);
+	fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(int),
+				sizeof(int), 1, BPF_F_NO_PREALLOC);
+	assert(fd_map > 0);
+
+	for (i = 0; i < 5; i++) {
+		do_test(&tests, 2, -1,     bpf_gen_imm_prog);
+		do_test(&tests, 3, fd_map, bpf_gen_map_prog);
+	}
+
+	printf("test_tag: OK (%u tests)\n", tests);
+	close(fd_map);
+	return 0;
+}
-- 
cgit v1.2.3


From 3fadc80115837b86f989d17c4aa92bb5cb7bc1b6 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 24 Jan 2017 01:06:30 +0100
Subject: bpf: enable verifier to better track const alu ops

William reported couple of issues in relation to direct packet
access. Typical scheme is to check for data + [off] <= data_end,
where [off] can be either immediate or coming from a tracked
register that contains an immediate, depending on the branch, we
can then access the data. However, in case of calculating [off]
for either the mentioned test itself or for access after the test
in a more "complex" way, then the verifier will stop tracking the
CONST_IMM marked register and will mark it as UNKNOWN_VALUE one.

Adding that UNKNOWN_VALUE typed register to a pkt() marked
register, the verifier then bails out in check_packet_ptr_add()
as it finds the registers imm value below 48. In the first below
example, that is due to evaluate_reg_imm_alu() not handling right
shifts and thus marking the register as UNKNOWN_VALUE via helper
__mark_reg_unknown_value() that resets imm to 0.

In the second case the same happens at the time when r4 is set
to r4 &= r5, where it transitions to UNKNOWN_VALUE from
evaluate_reg_imm_alu(). Later on r4 we shift right by 3 inside
evaluate_reg_alu(), where the register's imm turns into 3. That
is, for registers with type UNKNOWN_VALUE, imm of 0 means that
we don't know what value the register has, and for imm > 0 it
means that the value has [imm] upper zero bits. F.e. when shifting
an UNKNOWN_VALUE register by 3 to the right, no matter what value
it had, we know that the 3 upper most bits must be zero now.
This is to make sure that ALU operations with unknown registers
don't overflow. Meaning, once we know that we have more than 48
upper zero bits, or, in other words cannot go beyond 0xffff offset
with ALU ops, such an addition will track the target register
as a new pkt() register with a new id, but 0 offset and 0 range,
so for that a new data/data_end test will be required. Is the source
register a CONST_IMM one that is to be added to the pkt() register,
or the source instruction is an add instruction with immediate
value, then it will get added if it stays within max 0xffff bounds.
>From there, pkt() type, can be accessed should reg->off + imm be
within the access range of pkt().

  [...]
  from 28 to 30: R0=imm1,min_value=1,max_value=1
    R1=pkt(id=0,off=0,r=22) R2=pkt_end
    R3=imm144,min_value=144,max_value=144
    R4=imm0,min_value=0,max_value=0
    R5=inv48,min_value=2054,max_value=2054 R10=fp
  30: (bf) r5 = r3
  31: (07) r5 += 23
  32: (77) r5 >>= 3
  33: (bf) r6 = r1
  34: (0f) r6 += r5
  cannot add integer value with 0 upper zero bits to ptr_to_packet

  [...]
  from 52 to 80: R0=imm1,min_value=1,max_value=1
    R1=pkt(id=0,off=0,r=34) R2=pkt_end R3=inv
    R4=imm272 R5=inv56,min_value=17,max_value=17
    R6=pkt(id=0,off=26,r=34) R10=fp
  80: (07) r4 += 71
  81: (18) r5 = 0xfffffff8
  83: (5f) r4 &= r5
  84: (77) r4 >>= 3
  85: (0f) r1 += r4
  cannot add integer value with 3 upper zero bits to ptr_to_packet

Thus to get above use-cases working, evaluate_reg_imm_alu() has
been extended for further ALU ops. This is fine, because we only
operate strictly within realm of CONST_IMM types, so here we don't
care about overflows as they will happen in the simulated but also
real execution and interaction with pkt() in check_packet_ptr_add()
will check actual imm value once added to pkt(), but it's irrelevant
before.

With regards to 06c1c049721a ("bpf: allow helpers access to variable
memory") that works on UNKNOWN_VALUE registers, the verifier becomes
now a bit smarter as it can better resolve ALU ops, so we need to
adapt two test cases there, as min/max bound tracking only becomes
necessary when registers were spilled to stack. So while mask was
set before to track upper bound for UNKNOWN_VALUE case, it's now
resolved directly as CONST_IMM, and such contructs are only necessary
when f.e. registers are spilled.

For commit 6b17387307ba ("bpf: recognize 64bit immediate loads as
consts") that initially enabled dw load tracking only for nfp jit/
analyzer, I did couple of tests on large, complex programs and we
don't increase complexity badly (my tests were in ~3% range on avg).
I've added a couple of tests similar to affected code above, and
it works fine with verifier now.

Reported-by: William Tu <u9012063@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Gianluca Borello <g.borello@gmail.com>
Cc: William Tu <u9012063@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       | 64 +++++++++++++++-------
 tools/testing/selftests/bpf/test_verifier.c | 82 +++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+), 19 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8f69df7e8167..fb3513b35c0b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1566,22 +1566,54 @@ static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
 	struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
 	struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 	u8 opcode = BPF_OP(insn->code);
+	u64 dst_imm = dst_reg->imm;
 
-	/* dst_reg->type == CONST_IMM here, simulate execution of 'add'/'or'
-	 * insn. Don't care about overflow or negative values, just add them
+	/* dst_reg->type == CONST_IMM here. Simulate execution of insns
+	 * containing ALU ops. Don't care about overflow or negative
+	 * values, just add/sub/... them; registers are in u64.
 	 */
-	if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K)
-		dst_reg->imm += insn->imm;
-	else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
-		 src_reg->type == CONST_IMM)
-		dst_reg->imm += src_reg->imm;
-	else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K)
-		dst_reg->imm |= insn->imm;
-	else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
-		 src_reg->type == CONST_IMM)
-		dst_reg->imm |= src_reg->imm;
-	else
+	if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm += insn->imm;
+	} else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm += src_reg->imm;
+	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm -= insn->imm;
+	} else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm -= src_reg->imm;
+	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm *= insn->imm;
+	} else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm *= src_reg->imm;
+	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm |= insn->imm;
+	} else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm |= src_reg->imm;
+	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm &= insn->imm;
+	} else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm &= src_reg->imm;
+	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm >>= insn->imm;
+	} else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm >>= src_reg->imm;
+	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) {
+		dst_imm <<= insn->imm;
+	} else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X &&
+		   src_reg->type == CONST_IMM) {
+		dst_imm <<= src_reg->imm;
+	} else {
 		mark_reg_unknown_value(regs, insn->dst_reg);
+		goto out;
+	}
+
+	dst_reg->imm = dst_imm;
+out:
 	return 0;
 }
 
@@ -2225,14 +2257,8 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 		return err;
 
 	if (insn->src_reg == 0) {
-		/* generic move 64-bit immediate into a register,
-		 * only analyzer needs to collect the ld_imm value.
-		 */
 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 
-		if (!env->analyzer_ops)
-			return 0;
-
 		regs[insn->dst_reg].type = CONST_IMM;
 		regs[insn->dst_reg].imm = imm;
 		return 0;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 1aa73241c999..0d0912c7f03c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2325,6 +2325,84 @@ static struct bpf_test tests[] = {
 		.result = REJECT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test11 (shift, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 144),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test12 (and, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 144),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test13 (branches, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 14),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 24),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
@@ -4208,6 +4286,8 @@ static struct bpf_test tests[] = {
 		.insns = {
 			BPF_MOV64_IMM(BPF_REG_1, 0),
 			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
 			BPF_MOV64_IMM(BPF_REG_4, 0),
@@ -4251,6 +4331,8 @@ static struct bpf_test tests[] = {
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
 			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
 			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
 			BPF_MOV64_IMM(BPF_REG_3, 0),
-- 
cgit v1.2.3


From afb999cdef69148f366839e74470d8f5375ba5f1 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 23 Jan 2017 08:11:07 -0800
Subject: tools: firmware: check for distro fallback udev cancel rule

Some distributions (Debian, OpenSUSE) have a udev rule in place to cancel
all fallback mechanism uevents immediately. This would obviously
make it hard to test against the fallback mechanism test interface,
so we need to check for this.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_userhelper.sh | 28 +++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh
index b9983f8e09f6..01c626a1f226 100755
--- a/tools/testing/selftests/firmware/fw_userhelper.sh
+++ b/tools/testing/selftests/firmware/fw_userhelper.sh
@@ -64,9 +64,33 @@ trap "test_finish" EXIT
 echo "ABCD0123" >"$FW"
 NAME=$(basename "$FW")
 
+DEVPATH="$DIR"/"nope-$NAME"/loading
+
 # Test failure when doing nothing (timeout works).
-echo 1 >/sys/class/firmware/timeout
-echo -n "$NAME" >"$DIR"/trigger_request
+echo -n 2 >/sys/class/firmware/timeout
+echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+# Give the kernel some time to load the loading file, must be less
+# than the timeout above.
+sleep 1
+if [ ! -f $DEVPATH ]; then
+	echo "$0: fallback mechanism immediately cancelled"
+	echo ""
+	echo "The file never appeared: $DEVPATH"
+	echo ""
+	echo "This might be a distribution udev rule setup by your distribution"
+	echo "to immediately cancel all fallback requests, this must be"
+	echo "removed before running these tests. To confirm look for"
+	echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+	echo "and see if you have something like this:"
+	echo ""
+	echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+	echo ""
+	echo "If you do remove this file or comment out this line before"
+	echo "proceeding with these tests."
+	exit 1
+fi
+
 if diff -q "$FW" /dev/test_firmware >/dev/null ; then
 	echo "$0: firmware was not expected to match" >&2
 	exit 1
-- 
cgit v1.2.3


From 823b0221a522bb22dcd278a06fca308a95545471 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 23 Jan 2017 08:11:08 -0800
Subject: tools: firmware: rename fallback mechanism script

Calling it user mode helper only creates confusion.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/Makefile         |   2 +-
 tools/testing/selftests/firmware/fw_fallback.sh   | 124 ++++++++++++++++++++++
 tools/testing/selftests/firmware/fw_userhelper.sh | 123 ---------------------
 3 files changed, 125 insertions(+), 124 deletions(-)
 create mode 100755 tools/testing/selftests/firmware/fw_fallback.sh
 delete mode 100755 tools/testing/selftests/firmware/fw_userhelper.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 9bf82234855b..1894d625af2d 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -3,7 +3,7 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-TEST_PROGS := fw_filesystem.sh fw_userhelper.sh
+TEST_PROGS := fw_filesystem.sh fw_fallback.sh
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
new file mode 100755
index 000000000000..f694afb7d12d
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -0,0 +1,124 @@
+#!/bin/sh
+# This validates that the kernel will fall back to using the fallback mechanism
+# to load firmware it can't find on disk itself. We must request a firmware
+# that the kernel won't find, and any installed helper (e.g. udev) also
+# won't find so that we can do the load ourself manually.
+set -e
+
+modprobe test_firmware
+
+DIR=/sys/devices/virtual/misc/test_firmware
+
+# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
+# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
+# as an indicator for CONFIG_FW_LOADER_USER_HELPER.
+HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi)
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+       OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
+else
+	echo "usermode helper disabled so ignoring test"
+	exit 0
+fi
+
+FWPATH=$(mktemp -d)
+FW="$FWPATH/test-firmware.bin"
+
+test_finish()
+{
+	echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+	rm -f "$FW"
+	rmdir "$FWPATH"
+}
+
+load_fw()
+{
+	local name="$1"
+	local file="$2"
+
+	# This will block until our load (below) has finished.
+	echo -n "$name" >"$DIR"/trigger_request &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo 1 >"$DIR"/"$name"/loading
+	cat "$file" >"$DIR"/"$name"/data
+	echo 0 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
+trap "test_finish" EXIT
+
+# This is an unlikely real-world firmware content. :)
+echo "ABCD0123" >"$FW"
+NAME=$(basename "$FW")
+
+DEVPATH="$DIR"/"nope-$NAME"/loading
+
+# Test failure when doing nothing (timeout works).
+echo -n 2 >/sys/class/firmware/timeout
+echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+# Give the kernel some time to load the loading file, must be less
+# than the timeout above.
+sleep 1
+if [ ! -f $DEVPATH ]; then
+	echo "$0: fallback mechanism immediately cancelled"
+	echo ""
+	echo "The file never appeared: $DEVPATH"
+	echo ""
+	echo "This might be a distribution udev rule setup by your distribution"
+	echo "to immediately cancel all fallback requests, this must be"
+	echo "removed before running these tests. To confirm look for"
+	echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+	echo "and see if you have something like this:"
+	echo ""
+	echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+	echo ""
+	echo "If you do remove this file or comment out this line before"
+	echo "proceeding with these tests."
+	exit 1
+fi
+
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not expected to match" >&2
+	exit 1
+else
+	echo "$0: timeout works"
+fi
+
+# Put timeout high enough for us to do work but not so long that failures
+# slow down this test too much.
+echo 4 >/sys/class/firmware/timeout
+
+# Load this script instead of the desired firmware.
+load_fw "$NAME" "$0"
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not expected to match" >&2
+	exit 1
+else
+	echo "$0: firmware comparison works"
+fi
+
+# Do a proper load, which should work correctly.
+load_fw "$NAME" "$FW"
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not loaded" >&2
+	exit 1
+else
+	echo "$0: fallback mechanism works"
+
+fi
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh
deleted file mode 100755
index 01c626a1f226..000000000000
--- a/tools/testing/selftests/firmware/fw_userhelper.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/bin/sh
-# This validates that the kernel will fall back to using the user helper
-# to load firmware it can't find on disk itself. We must request a firmware
-# that the kernel won't find, and any installed helper (e.g. udev) also
-# won't find so that we can do the load ourself manually.
-set -e
-
-modprobe test_firmware
-
-DIR=/sys/devices/virtual/misc/test_firmware
-
-# CONFIG_FW_LOADER_USER_HELPER has a sysfs class under /sys/class/firmware/
-# These days no one enables CONFIG_FW_LOADER_USER_HELPER so check for that
-# as an indicator for CONFIG_FW_LOADER_USER_HELPER.
-HAS_FW_LOADER_USER_HELPER=$(if [ -d /sys/class/firmware/ ]; then echo yes; else echo no; fi)
-
-if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
-       OLD_TIMEOUT=$(cat /sys/class/firmware/timeout)
-else
-	echo "usermode helper disabled so ignoring test"
-	exit 0
-fi
-
-FWPATH=$(mktemp -d)
-FW="$FWPATH/test-firmware.bin"
-
-test_finish()
-{
-	echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
-	rm -f "$FW"
-	rmdir "$FWPATH"
-}
-
-load_fw()
-{
-	local name="$1"
-	local file="$2"
-
-	# This will block until our load (below) has finished.
-	echo -n "$name" >"$DIR"/trigger_request &
-
-	# Give kernel a chance to react.
-	local timeout=10
-	while [ ! -e "$DIR"/"$name"/loading ]; do
-		sleep 0.1
-		timeout=$(( $timeout - 1 ))
-		if [ "$timeout" -eq 0 ]; then
-			echo "$0: firmware interface never appeared" >&2
-			exit 1
-		fi
-	done
-
-	echo 1 >"$DIR"/"$name"/loading
-	cat "$file" >"$DIR"/"$name"/data
-	echo 0 >"$DIR"/"$name"/loading
-
-	# Wait for request to finish.
-	wait
-}
-
-trap "test_finish" EXIT
-
-# This is an unlikely real-world firmware content. :)
-echo "ABCD0123" >"$FW"
-NAME=$(basename "$FW")
-
-DEVPATH="$DIR"/"nope-$NAME"/loading
-
-# Test failure when doing nothing (timeout works).
-echo -n 2 >/sys/class/firmware/timeout
-echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
-
-# Give the kernel some time to load the loading file, must be less
-# than the timeout above.
-sleep 1
-if [ ! -f $DEVPATH ]; then
-	echo "$0: fallback mechanism immediately cancelled"
-	echo ""
-	echo "The file never appeared: $DEVPATH"
-	echo ""
-	echo "This might be a distribution udev rule setup by your distribution"
-	echo "to immediately cancel all fallback requests, this must be"
-	echo "removed before running these tests. To confirm look for"
-	echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
-	echo "and see if you have something like this:"
-	echo ""
-	echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
-	echo ""
-	echo "If you do remove this file or comment out this line before"
-	echo "proceeding with these tests."
-	exit 1
-fi
-
-if diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was not expected to match" >&2
-	exit 1
-else
-	echo "$0: timeout works"
-fi
-
-# Put timeout high enough for us to do work but not so long that failures
-# slow down this test too much.
-echo 4 >/sys/class/firmware/timeout
-
-# Load this script instead of the desired firmware.
-load_fw "$NAME" "$0"
-if diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was not expected to match" >&2
-	exit 1
-else
-	echo "$0: firmware comparison works"
-fi
-
-# Do a proper load, which should work correctly.
-load_fw "$NAME" "$FW"
-if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
-	echo "$0: firmware was not loaded" >&2
-	exit 1
-else
-	echo "$0: user helper firmware loading works"
-fi
-
-exit 0
-- 
cgit v1.2.3


From eb67bc3ffd1796e198fc923da2ba15beb7965529 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 23 Jan 2017 08:11:09 -0800
Subject: tools: firmware: add fallback cancelation testing

Add a test case for cancelling the sync fallback mechanism.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/testing/selftests/firmware/fw_fallback.sh | 32 +++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index f694afb7d12d..68e27e5f27a4 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -58,6 +58,31 @@ load_fw()
 	wait
 }
 
+load_fw_cancel()
+{
+	local name="$1"
+	local file="$2"
+
+	# This will block until our load (below) has finished.
+	echo -n "$name" >"$DIR"/trigger_request 2>/dev/null &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo -1 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
 trap "test_finish" EXIT
 
 # This is an unlikely real-world firmware content. :)
@@ -118,7 +143,14 @@ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
 	exit 1
 else
 	echo "$0: fallback mechanism works"
+fi
 
+load_fw_cancel "nope-$NAME" "$FW"
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was expected to be cancelled" >&2
+	exit 1
+else
+	echo "$0: cancelling fallback mechanism works"
 fi
 
 exit 0
-- 
cgit v1.2.3


From 061132d2b9c9504a9f314dcd73f6483a7d8cd1e8 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@kernel.org>
Date: Mon, 23 Jan 2017 08:11:10 -0800
Subject: test_firmware: add test custom fallback trigger

We have no custom fallback mechanism test interface. Provide one.
This tests both the custom fallback mechanism and cancelling the
it.

Signed-off-by: Luis R. Rodriguez <mcgrof@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_firmware.c                             | 45 ++++++++++++++++
 tools/testing/selftests/firmware/fw_fallback.sh | 68 +++++++++++++++++++++++++
 2 files changed, 113 insertions(+)

(limited to 'tools/testing')

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 38cc188c4d3c..09371b0a9baf 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -126,11 +126,56 @@ out:
 }
 static DEVICE_ATTR_WO(trigger_async_request);
 
+static ssize_t trigger_custom_fallback_store(struct device *dev,
+					     struct device_attribute *attr,
+					     const char *buf, size_t count)
+{
+	int rc;
+	char *name;
+
+	name = kstrndup(buf, count, GFP_KERNEL);
+	if (!name)
+		return -ENOSPC;
+
+	pr_info("loading '%s' using custom fallback mechanism\n", name);
+
+	mutex_lock(&test_fw_mutex);
+	release_firmware(test_firmware);
+	test_firmware = NULL;
+	rc = request_firmware_nowait(THIS_MODULE, FW_ACTION_NOHOTPLUG, name,
+				     dev, GFP_KERNEL, NULL,
+				     trigger_async_request_cb);
+	if (rc) {
+		pr_info("async load of '%s' failed: %d\n", name, rc);
+		kfree(name);
+		goto out;
+	}
+	/* Free 'name' ASAP, to test for race conditions */
+	kfree(name);
+
+	wait_for_completion(&async_fw_done);
+
+	if (test_firmware) {
+		pr_info("loaded: %zu\n", test_firmware->size);
+		rc = count;
+	} else {
+		pr_err("failed to async load firmware\n");
+		rc = -ENODEV;
+	}
+
+out:
+	mutex_unlock(&test_fw_mutex);
+
+	return rc;
+}
+static DEVICE_ATTR_WO(trigger_custom_fallback);
+
 #define TEST_FW_DEV_ATTR(name)          &dev_attr_##name.attr
 
 static struct attribute *test_dev_attrs[] = {
 	TEST_FW_DEV_ATTR(trigger_request),
 	TEST_FW_DEV_ATTR(trigger_async_request),
+	TEST_FW_DEV_ATTR(trigger_custom_fallback),
 	NULL,
 };
 
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
index 68e27e5f27a4..2e4c22d5abf7 100755
--- a/tools/testing/selftests/firmware/fw_fallback.sh
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -83,6 +83,58 @@ load_fw_cancel()
 	wait
 }
 
+load_fw_custom()
+{
+	local name="$1"
+	local file="$2"
+
+	echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo 1 >"$DIR"/"$name"/loading
+	cat "$file" >"$DIR"/"$name"/data
+	echo 0 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
+
+load_fw_custom_cancel()
+{
+	local name="$1"
+	local file="$2"
+
+	echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo -1 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
+
 trap "test_finish" EXIT
 
 # This is an unlikely real-world firmware content. :)
@@ -153,4 +205,20 @@ else
 	echo "$0: cancelling fallback mechanism works"
 fi
 
+load_fw_custom "$NAME" "$FW"
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not loaded" >&2
+	exit 1
+else
+	echo "$0: custom fallback loading mechanism works"
+fi
+
+load_fw_custom_cancel "nope-$NAME" "$FW"
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was expected to be cancelled" >&2
+	exit 1
+else
+	echo "$0: cancelling custom fallback mechanism works"
+fi
+
 exit 0
-- 
cgit v1.2.3


From 418b2977b34378f67c46930c72a776f94e7bf903 Mon Sep 17 00:00:00 2001
From: Lance Roy <ldr709@gmail.com>
Date: Sat, 31 Dec 2016 16:33:50 -0800
Subject: rcutorture: Add CBMC-based formal verification for SRCU

This commit creates a formal/srcu-cbmc directory containing scripts that
pull SRCU in from the source code, filter it to remove things that CBMC
cannot handle, and run a series of verifications on it.  This has a number
of shortcomings:

1.	It does not yet hook into the upper-level self-test Makefiles.
2.	It tests only a single scenario, store buffering.
3.	There is no gcc-based syntax-error prefiltering.

Nevertheless, it does fully verify a piece of SRCU under a moderately
weak memory model (PSO).

Signed-off-by: Lance Roy <ldr709@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 .../rcutorture/formal/srcu-cbmc/.gitignore         |   1 +
 .../selftests/rcutorture/formal/srcu-cbmc/Makefile |  16 +
 .../formal/srcu-cbmc/empty_includes/linux/delay.h  |   0
 .../formal/srcu-cbmc/empty_includes/linux/export.h |   0
 .../formal/srcu-cbmc/empty_includes/linux/mutex.h  |   0
 .../formal/srcu-cbmc/empty_includes/linux/percpu.h |   0
 .../srcu-cbmc/empty_includes/linux/preempt.h       |   0
 .../srcu-cbmc/empty_includes/linux/rcupdate.h      |   0
 .../formal/srcu-cbmc/empty_includes/linux/sched.h  |   0
 .../formal/srcu-cbmc/empty_includes/linux/smp.h    |   0
 .../srcu-cbmc/empty_includes/linux/workqueue.h     |   0
 .../srcu-cbmc/empty_includes/uapi/linux/types.h    |   0
 .../formal/srcu-cbmc/include/linux/.gitignore      |   1 +
 .../formal/srcu-cbmc/include/linux/kconfig.h       |   1 +
 .../formal/srcu-cbmc/include/linux/types.h         | 155 +++++++++
 .../rcutorture/formal/srcu-cbmc/modify_srcu.awk    | 375 +++++++++++++++++++++
 .../rcutorture/formal/srcu-cbmc/src/assume.h       |  16 +
 .../rcutorture/formal/srcu-cbmc/src/barriers.h     |  41 +++
 .../rcutorture/formal/srcu-cbmc/src/bug_on.h       |  13 +
 .../formal/srcu-cbmc/src/combined_source.c         |  13 +
 .../rcutorture/formal/srcu-cbmc/src/config.h       |  27 ++
 .../rcutorture/formal/srcu-cbmc/src/include_srcu.c |  31 ++
 .../rcutorture/formal/srcu-cbmc/src/int_typedefs.h |  33 ++
 .../rcutorture/formal/srcu-cbmc/src/locks.h        | 220 ++++++++++++
 .../rcutorture/formal/srcu-cbmc/src/misc.c         |  11 +
 .../rcutorture/formal/srcu-cbmc/src/misc.h         |  58 ++++
 .../rcutorture/formal/srcu-cbmc/src/percpu.h       |  92 +++++
 .../rcutorture/formal/srcu-cbmc/src/preempt.c      |  78 +++++
 .../rcutorture/formal/srcu-cbmc/src/preempt.h      |  58 ++++
 .../formal/srcu-cbmc/src/simple_sync_srcu.c        |  50 +++
 .../rcutorture/formal/srcu-cbmc/src/workqueues.h   | 102 ++++++
 .../srcu-cbmc/tests/store_buffering/.gitignore     |   1 +
 .../srcu-cbmc/tests/store_buffering/Makefile       |  11 +
 .../tests/store_buffering/assert_end.fail          |   1 +
 .../srcu-cbmc/tests/store_buffering/force.fail     |   1 +
 .../srcu-cbmc/tests/store_buffering/force2.fail    |   1 +
 .../srcu-cbmc/tests/store_buffering/force3.fail    |   1 +
 .../srcu-cbmc/tests/store_buffering/main.pass      |   0
 .../formal/srcu-cbmc/tests/store_buffering/test.c  |  72 ++++
 .../formal/srcu-cbmc/tests/test_script.sh          | 102 ++++++
 40 files changed, 1582 insertions(+)
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
 create mode 100755 tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
 create mode 100644 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
 create mode 100755 tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
new file mode 100644
index 000000000000..712a3d41a325
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
@@ -0,0 +1 @@
+srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
new file mode 100644
index 000000000000..16b01559fa55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
@@ -0,0 +1,16 @@
+all: srcu.c store_buffering
+
+LINUX_SOURCE = ../../../../../..
+
+modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
+		      $(LINUX_SOURCE)/kernel/rcu/srcu.c
+
+modified_srcu_output = include/linux/srcu.h srcu.c
+
+include/linux/srcu.h: srcu.c
+
+srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
+	awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
+
+store_buffering:
+	@cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
new file mode 100644
index 000000000000..1d016e66980a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
@@ -0,0 +1 @@
+srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
new file mode 100644
index 000000000000..f2860dd1b407
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
@@ -0,0 +1 @@
+#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
new file mode 100644
index 000000000000..4a3d538fef12
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
@@ -0,0 +1,155 @@
+/*
+ * This header has been modifies to remove definitions of types that
+ * are defined in standard userspace headers or are problematic for some
+ * other reason.
+ */
+
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+
+#define __EXPORTED_HEADERS__
+#include <uapi/linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#define DECLARE_BITMAP(name, bits) \
+	unsigned long name[BITS_TO_LONGS(bits)]
+
+typedef __u32 __kernel_dev_t;
+
+/* bsd */
+typedef unsigned char		u_char;
+typedef unsigned short		u_short;
+typedef unsigned int		u_int;
+typedef unsigned long		u_long;
+
+/* sysv */
+typedef unsigned char		unchar;
+typedef unsigned short		ushort;
+typedef unsigned int		uint;
+typedef unsigned long		ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef		__u8		u_int8_t;
+typedef		__s8		int8_t;
+typedef		__u16		u_int16_t;
+typedef		__s16		int16_t;
+typedef		__u32		u_int32_t;
+typedef		__s32		int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef		__u8		uint8_t;
+typedef		__u16		uint16_t;
+typedef		__u32		uint32_t;
+
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 __u64 __attribute__((aligned(8)))
+#define aligned_be64 __be64 __attribute__((aligned(8)))
+#define aligned_le64 __le64 __attribute__((aligned(8)))
+
+/**
+ * The type used for indexing onto a disc or disc partition.
+ *
+ * Linux always considers sectors to be 512 bytes long independently
+ * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
+ */
+#ifdef CONFIG_LBDAF
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+
+/*
+ * The type of an index into the pagecache.
+ */
+#define pgoff_t unsigned long
+
+/*
+ * A dma_addr_t can hold any valid DMA address, i.e., any address returned
+ * by the DMA API.
+ *
+ * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
+ * bits wide.  Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
+ * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
+ * so they don't care about the size of the actual bus addresses.
+ */
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+
+typedef phys_addr_t resource_size_t;
+
+/*
+ * This type is the placeholder for a hardware interrupt number. It has to be
+ * big enough to enclose whatever representation is used by a given platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+typedef struct {
+	int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+	long counter;
+} atomic64_t;
+#endif
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+/**
+ * struct callback_head - callback structure for use with RCU and task_work
+ * @next: next update requests in a list
+ * @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ *  - future call_rcu_lazy() will make use of lower bits in the pointer;
+ *  - the structure shares storage spacer in struct page with @compound_head,
+ *    which encode PageTail() in bit 0. The guarantee is needed to avoid
+ *    false-positive PageTail().
+ */
+struct callback_head {
+	struct callback_head *next;
+	void (*func)(struct callback_head *head);
+} __attribute__((aligned(sizeof(void *))));
+#define rcu_head callback_head
+
+typedef void (*rcu_callback_t)(struct rcu_head *head);
+typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
+#endif /*  __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
new file mode 100755
index 000000000000..8ff89043d0a9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
@@ -0,0 +1,375 @@
+#!/bin/awk -f
+
+# Modify SRCU for formal verification. The first argument should be srcu.h and
+# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
+# current directory.
+
+BEGIN {
+	if (ARGC != 5) {
+		print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
+		exit 1;
+	}
+	h_output = ARGV[3];
+	c_output = ARGV[4];
+	ARGC = 3;
+
+	# Tokenize using FS and not RS as FS supports regular expressions. Each
+	# record is one line of source, except that backslashed lines are
+	# combined. Comments are treated as field separators, as are quotes.
+	quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
+	comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
+	FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
+
+	inside_srcu_struct = 0;
+	inside_srcu_init_def = 0;
+	srcu_init_param_name = "";
+	in_macro = 0;
+	brace_nesting = 0;
+	paren_nesting = 0;
+
+	# Allow the manipulation of the last field separator after has been
+	# seen.
+	last_fs = "";
+	# Whether the last field separator was intended to be output.
+	last_fs_print = 0;
+
+	# rcu_batches stores the initialization for each instance of struct
+	# rcu_batch
+
+	in_comment = 0;
+
+	outputfile = "";
+}
+
+{
+	prev_outputfile = outputfile;
+	if (FILENAME ~ /\.h$/) {
+		outputfile = h_output;
+		if (FNR != NR) {
+			print "Incorrect file order" > "/dev/stderr";
+			exit 1;
+		}
+	}
+	else
+		outputfile = c_output;
+
+	if (prev_outputfile && outputfile != prev_outputfile) {
+		new_outputfile = outputfile;
+		outputfile = prev_outputfile;
+		update_fieldsep("", 0);
+		outputfile = new_outputfile;
+	}
+}
+
+# Combine the next line into $0.
+function combine_line() {
+	ret = getline next_line;
+	if (ret == 0) {
+		# Don't allow two consecutive getlines at the end of the file
+		if (eof_found) {
+			print "Error: expected more input." > "/dev/stderr";
+			exit 1;
+		} else {
+			eof_found = 1;
+		}
+	} else if (ret == -1) {
+		print "Error reading next line of file" FILENAME > "/dev/stderr";
+		exit 1;
+	}
+	$0 = $0 "\n" next_line;
+}
+
+# Combine backslashed lines and multiline comments.
+function combine_backslashes() {
+	while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
+		combine_line();
+	}
+}
+
+function read_line() {
+	combine_line();
+	combine_backslashes();
+}
+
+# Print out field separators and update variables that depend on them. Only
+# print if p is true. Call with sep="" and p=0 to print out the last field
+# separator.
+function update_fieldsep(sep, p) {
+	# Count braces
+	sep_tmp = sep;
+	gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
+	while (1)
+	{
+		if (sub("[^{}()]*\\{", "", sep_tmp)) {
+			brace_nesting++;
+			continue;
+		}
+		if (sub("[^{}()]*\\}", "", sep_tmp)) {
+			brace_nesting--;
+			if (brace_nesting < 0) {
+				print "Unbalanced braces!" > "/dev/stderr";
+				exit 1;
+			}
+			continue;
+		}
+		if (sub("[^{}()]*\\(", "", sep_tmp)) {
+			paren_nesting++;
+			continue;
+		}
+		if (sub("[^{}()]*\\)", "", sep_tmp)) {
+			paren_nesting--;
+			if (paren_nesting < 0) {
+				print "Unbalanced parenthesis!" > "/dev/stderr";
+				exit 1;
+			}
+			continue;
+		}
+
+		break;
+	}
+
+	if (last_fs_print)
+		printf("%s", last_fs) > outputfile;
+	last_fs = sep;
+	last_fs_print = p;
+}
+
+# Shifts the fields down by n positions. Calls next if there are no more. If p
+# is true then print out field separators.
+function shift_fields(n, p) {
+	do {
+		if (match($0, FS) > 0) {
+			update_fieldsep(substr($0, RSTART, RLENGTH), p);
+			if (RSTART + RLENGTH <= length())
+				$0 = substr($0, RSTART + RLENGTH);
+			else
+				$0 = "";
+		} else {
+			update_fieldsep("", 0);
+			print "" > outputfile;
+			next;
+		}
+	} while (--n > 0);
+}
+
+# Shifts and prints the first n fields.
+function print_fields(n) {
+	do {
+		update_fieldsep("", 0);
+		printf("%s", $1) > outputfile;
+		shift_fields(1, 1);
+	} while (--n > 0);
+}
+
+{
+	combine_backslashes();
+}
+
+# Print leading FS
+{
+	if (match($0, "^(" FS ")+") > 0) {
+		update_fieldsep(substr($0, RSTART, RLENGTH), 1);
+		if (RSTART + RLENGTH <= length())
+			$0 = substr($0, RSTART + RLENGTH);
+		else
+			$0 = "";
+	}
+}
+
+# Parse the line.
+{
+	while (NF > 0) {
+		if ($1 == "struct" && NF < 3) {
+			read_line();
+			continue;
+		}
+
+		if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
+		    brace_nesting == 0 && paren_nesting == 0 &&
+		    $1 == "struct" && $2 == "srcu_struct" &&
+		    $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
+			inside_srcu_struct = 1;
+			print_fields(2);
+			continue;
+		}
+		if (inside_srcu_struct && brace_nesting == 0 &&
+		    paren_nesting == 0) {
+			inside_srcu_struct = 0;
+			update_fieldsep("", 0);
+			for (name in rcu_batches)
+				print "extern struct rcu_batch " name ";" > outputfile;
+		}
+
+		if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
+			# Move rcu_batches outside of the struct.
+			rcu_batches[$3] = "";
+			shift_fields(3, 1);
+			sub(/;[[:space:]]*$/, "", last_fs);
+			continue;
+		}
+
+		if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
+		    $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
+			inside_srcu_init_def = 1;
+			srcu_init_param_name = $3;
+			in_macro = 1;
+			print_fields(3);
+			continue;
+		}
+		if (inside_srcu_init_def && brace_nesting == 0 &&
+		    paren_nesting == 0) {
+			inside_srcu_init_def = 0;
+			in_macro = 0;
+			continue;
+		}
+
+		if (inside_srcu_init_def && brace_nesting == 1 &&
+		    paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
+		    $1 ~ /^[[:alnum:]_]+$/) {
+			name = $1;
+			if (name in rcu_batches) {
+				# Remove the dot.
+				sub(/\.[[:space:]]*$/, "", last_fs);
+
+				old_record = $0;
+				do
+					shift_fields(1, 0);
+				while (last_fs !~ /,/ || paren_nesting > 0);
+				end_loc = length(old_record) - length($0);
+				end_loc += index(last_fs, ",") - length(last_fs);
+
+				last_fs = substr(last_fs, index(last_fs, ",") + 1);
+				last_fs_print = 1;
+
+				match(old_record, "^"name"("FS")+=");
+				start_loc = RSTART + RLENGTH;
+
+				len = end_loc - start_loc;
+				initializer = substr(old_record, start_loc, len);
+				gsub(srcu_init_param_name "\\.", "", initializer);
+				rcu_batches[name] = initializer;
+				continue;
+			}
+		}
+
+		# Don't include a nonexistent file
+		if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
+			update_fieldsep("", 0);
+			next;
+		}
+
+		# Ignore most preprocessor stuff.
+		if (!in_macro && $1 ~ /#/) {
+			break;
+		}
+
+		if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
+			read_line();
+			continue;
+		}
+		if (brace_nesting > 0 &&
+		    $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
+		    $2 in rcu_batches) {
+			# Make uses of rcu_batches global. Somewhat unreliable.
+			shift_fields(1, 0);
+			print_fields(1);
+			continue;
+		}
+
+		if ($1 == "static" && NF < 3) {
+			read_line();
+			continue;
+		}
+		if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
+		                       $2 == "void" && $3 == "srcu_flip")) {
+			shift_fields(1, 1);
+			print_fields(2);
+			continue;
+		}
+
+		# Distinguish between read-side and write-side memory barriers.
+		if ($1 == "smp_mb" && NF < 2) {
+			read_line();
+			continue;
+		}
+		if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
+			barrier_letter = substr($0, RLENGTH, 1);
+			if (barrier_letter ~ /A|D/)
+				new_barrier_name = "sync_smp_mb";
+			else if (barrier_letter ~ /B|C/)
+				new_barrier_name = "rs_smp_mb";
+			else {
+				print "Unrecognized memory barrier." > "/dev/null";
+				exit 1;
+			}
+
+			shift_fields(1, 1);
+			printf("%s", new_barrier_name) > outputfile;
+			continue;
+		}
+
+		# Skip definition of rcu_synchronize, since it is already
+		# defined in misc.h. Only present in old versions of srcu.
+		if (brace_nesting == 0 && paren_nesting == 0 &&
+		    $1 == "struct" && $2 == "rcu_synchronize" &&
+		    $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
+			shift_fields(2, 0);
+			while (brace_nesting) {
+				if (NF < 2)
+					read_line();
+				shift_fields(1, 0);
+			}
+		}
+
+		# Skip definition of wakeme_after_rcu for the same reason
+		if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
+		    $3 == "wakeme_after_rcu") {
+			while (NF < 5)
+				read_line();
+			shift_fields(3, 0);
+			do {
+				while (NF < 3)
+					read_line();
+				shift_fields(1, 0);
+			} while (paren_nesting || brace_nesting);
+		}
+
+		if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
+			read_line();
+			continue;
+		}
+
+		# Give srcu_batches_completed the correct type for old SRCU.
+		if (brace_nesting == 0 && $1 == "long" &&
+		    $2 == "srcu_batches_completed") {
+			update_fieldsep("", 0);
+			printf("unsigned ") > outputfile;
+			print_fields(2);
+			continue;
+		}
+		if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
+		    $3 == "srcu_batches_completed") {
+			print_fields(3);
+			continue;
+		}
+
+		# Just print out the input code by default.
+		print_fields(1);
+	}
+	update_fieldsep("", 0);
+	print > outputfile;
+	next;
+}
+
+END {
+	update_fieldsep("", 0);
+
+	if (brace_nesting != 0) {
+		print "Unbalanced braces!" > "/dev/stderr";
+		exit 1;
+	}
+
+	# Define the rcu_batches
+	for (name in rcu_batches)
+		print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
new file mode 100644
index 000000000000..a64955447995
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
@@ -0,0 +1,16 @@
+#ifndef ASSUME_H
+#define ASSUME_H
+
+/* Provide an assumption macro that can be disabled for gcc. */
+#ifdef RUN
+#define assume(x) \
+	do { \
+		/* Evaluate x to suppress warnings. */ \
+		(void) (x); \
+	} while (0)
+
+#else
+#define assume(x) __CPROVER_assume(x)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
new file mode 100644
index 000000000000..6687acc08e6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
@@ -0,0 +1,41 @@
+#ifndef BARRIERS_H
+#define BARRIERS_H
+
+#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#ifdef RUN
+#define smp_mb() __sync_synchronize()
+#define smp_mb__after_unlock_lock() __sync_synchronize()
+#else
+/*
+ * Copied from CBMC's implementation of __sync_synchronize(), which
+ * seems to be disabled by default.
+ */
+#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+				 "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+				    "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#endif
+
+/*
+ * Allow memory barriers to be disabled in either the read or write side
+ * of SRCU individually.
+ */
+
+#ifndef NO_SYNC_SMP_MB
+#define sync_smp_mb() smp_mb()
+#else
+#define sync_smp_mb() do {} while (0)
+#endif
+
+#ifndef NO_READ_SIDE_SMP_MB
+#define rs_smp_mb() smp_mb()
+#else
+#define rs_smp_mb() do {} while (0)
+#endif
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *) &(x))
+#define READ_ONCE(x) ACCESS_ONCE(x)
+#define WRITE_ONCE(x, val) (ACCESS_ONCE(x) = (val))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
new file mode 100644
index 000000000000..2a80e91f78e7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
@@ -0,0 +1,13 @@
+#ifndef BUG_ON_H
+#define BUG_ON_H
+
+#include <assert.h>
+
+#define BUG() assert(0)
+#define BUG_ON(x) assert(!(x))
+
+/* Does it make sense to treat warnings as errors? */
+#define WARN() BUG()
+#define WARN_ON(x) (BUG_ON(x), false)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
new file mode 100644
index 000000000000..29eb5d2697ed
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
@@ -0,0 +1,13 @@
+#include <config.h>
+
+/* Include all source files. */
+
+#include "include_srcu.c"
+
+#include "preempt.c"
+#include "misc.c"
+
+/* Used by test.c files */
+#include <pthread.h>
+#include <stdlib.h>
+#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
new file mode 100644
index 000000000000..a60038aeea7a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
@@ -0,0 +1,27 @@
+/* "Cheater" definitions based on restricted Kconfig choices. */
+
+#undef CONFIG_TINY_RCU
+#undef __CHECKER__
+#undef CONFIG_DEBUG_LOCK_ALLOC
+#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+#undef CONFIG_HOTPLUG_CPU
+#undef CONFIG_MODULES
+#undef CONFIG_NO_HZ_FULL_SYSIDLE
+#undef CONFIG_PREEMPT_COUNT
+#undef CONFIG_PREEMPT_RCU
+#undef CONFIG_PROVE_RCU
+#undef CONFIG_RCU_NOCB_CPU
+#undef CONFIG_RCU_NOCB_CPU_ALL
+#undef CONFIG_RCU_STALL_COMMON
+#undef CONFIG_RCU_TRACE
+#undef CONFIG_RCU_USER_QS
+#undef CONFIG_TASKS_RCU
+#define CONFIG_TREE_RCU
+
+#define CONFIG_GENERIC_ATOMIC64
+
+#if NR_CPUS > 1
+#define CONFIG_SMP
+#else
+#undef CONFIG_SMP
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
new file mode 100644
index 000000000000..5ec582a53018
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
@@ -0,0 +1,31 @@
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#define synchronize_srcu(sp) synchronize_srcu_original(sp)
+#endif
+
+#include <srcu.c>
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#undef synchronize_srcu
+
+#include "simple_sync_srcu.c"
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
new file mode 100644
index 000000000000..3aad63917858
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
@@ -0,0 +1,33 @@
+#ifndef INT_TYPEDEFS_H
+#define INT_TYPEDEFS_H
+
+#include <inttypes.h>
+
+typedef int8_t s8;
+typedef uint8_t u8;
+typedef int16_t s16;
+typedef uint16_t u16;
+typedef int32_t s32;
+typedef uint32_t u32;
+typedef int64_t s64;
+typedef uint64_t u64;
+
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef int64_t __s64;
+typedef uint64_t __u64;
+
+#define S8_C(x) INT8_C(x)
+#define U8_C(x) UINT8_C(x)
+#define S16_C(x) INT16_C(x)
+#define U16_C(x) UINT16_C(x)
+#define S32_C(x) INT32_C(x)
+#define U32_C(x) UINT32_C(x)
+#define S64_C(x) INT64_C(x)
+#define U64_C(x) UINT64_C(x)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
new file mode 100644
index 000000000000..356004665576
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
@@ -0,0 +1,220 @@
+#ifndef LOCKS_H
+#define LOCKS_H
+
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include "assume.h"
+#include "bug_on.h"
+#include "preempt.h"
+
+int nondet_int(void);
+
+#define __acquire(x)
+#define __acquires(x)
+#define __release(x)
+#define __releases(x)
+
+/* Only use one lock mechanism. Select which one. */
+#ifdef PTHREAD_LOCK
+struct lock_impl {
+	pthread_mutex_t mutex;
+};
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+	BUG_ON(pthread_mutex_lock(&lock->mutex));
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+	BUG_ON(pthread_mutex_unlock(&lock->mutex));
+}
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+	int err = pthread_mutex_trylock(&lock->mutex);
+
+	if (!err)
+		return true;
+	else if (err == EBUSY)
+		return false;
+	BUG();
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+	pthread_mutex_init(&lock->mutex, NULL);
+}
+
+#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
+
+#else /* !defined(PTHREAD_LOCK) */
+/* Spinlock that assumes that it always gets the lock immediately. */
+
+struct lock_impl {
+	bool locked;
+};
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+#ifdef RUN
+	/* TODO: Should this be a test and set? */
+	return __sync_bool_compare_and_swap(&lock->locked, false, true);
+#else
+	__CPROVER_atomic_begin();
+	bool old_locked = lock->locked;
+	lock->locked = true;
+	__CPROVER_atomic_end();
+
+	/* Minimal barrier to prevent accesses leaking out of lock. */
+	__CPROVER_fence("RRfence", "RWfence");
+
+	return !old_locked;
+#endif
+}
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+	/*
+	 * CBMC doesn't support busy waiting, so just assume that the
+	 * lock is available.
+	 */
+	assume(lock_impl_trylock(lock));
+
+	/*
+	 * If the lock was already held by this thread then the assumption
+	 * is unsatisfiable (deadlock).
+	 */
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+#ifdef RUN
+	BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
+#else
+	/* Minimal barrier to prevent accesses leaking out of lock. */
+	__CPROVER_fence("RWfence", "WWfence");
+
+	__CPROVER_atomic_begin();
+	bool old_locked = lock->locked;
+	lock->locked = false;
+	__CPROVER_atomic_end();
+
+	BUG_ON(!old_locked);
+#endif
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+	lock->locked = false;
+}
+
+#define LOCK_IMPL_INITIALIZER {.locked = false}
+
+#endif /* !defined(PTHREAD_LOCK) */
+
+/*
+ * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
+ * locks of different types.
+ */
+typedef struct {
+	struct lock_impl internal_lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
+#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+	lock_impl_init(&lock->internal_lock);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+	/*
+	 * Spin locks also need to be removed in order to eliminate all
+	 * memory barriers. They are only used by the write side anyway.
+	 */
+#ifndef NO_SYNC_SMP_MB
+	preempt_disable();
+	lock_impl_lock(&lock->internal_lock);
+#endif
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+	lock_impl_unlock(&lock->internal_lock);
+	preempt_enable();
+#endif
+}
+
+/* Don't bother with interrupts */
+#define spin_lock_irq(lock) spin_lock(lock)
+#define spin_unlock_irq(lock) spin_unlock(lock)
+#define spin_lock_irqsave(lock, flags) spin_lock(lock)
+#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
+
+/*
+ * This is supposed to return an int, but I think that a bool should work as
+ * well.
+ */
+static inline bool spin_trylock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+	preempt_disable();
+	return lock_impl_trylock(&lock->internal_lock);
+#else
+	return true;
+#endif
+}
+
+struct completion {
+	/* Hopefuly this won't overflow. */
+	unsigned int count;
+};
+
+#define COMPLETION_INITIALIZER(x) {.count = 0}
+#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
+#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
+
+static inline void init_completion(struct completion *c)
+{
+	c->count = 0;
+}
+
+static inline void wait_for_completion(struct completion *c)
+{
+	unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
+
+	assume(prev_count);
+}
+
+static inline void complete(struct completion *c)
+{
+	unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
+
+	BUG_ON(prev_count == UINT_MAX);
+}
+
+/* This function probably isn't very useful for CBMC. */
+static inline bool try_wait_for_completion(struct completion *c)
+{
+	BUG();
+}
+
+static inline bool completion_done(struct completion *c)
+{
+	return c->count;
+}
+
+/* TODO: Implement complete_all */
+static inline void complete_all(struct completion *c)
+{
+	BUG();
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
new file mode 100644
index 000000000000..ca892e3b2351
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
@@ -0,0 +1,11 @@
+#include <config.h>
+
+#include "misc.h"
+#include "bug_on.h"
+
+struct rcu_head;
+
+void wakeme_after_rcu(struct rcu_head *head)
+{
+	BUG();
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
new file mode 100644
index 000000000000..aca50030f954
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
@@ -0,0 +1,58 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include "assume.h"
+#include "int_typedefs.h"
+#include "locks.h"
+
+#include <linux/types.h>
+
+/* Probably won't need to deal with bottom halves. */
+static inline void local_bh_disable(void) {}
+static inline void local_bh_enable(void) {}
+
+#define MODULE_ALIAS(X)
+#define module_param(...)
+#define EXPORT_SYMBOL_GPL(x)
+
+#define container_of(ptr, type, member) ({			\
+	const typeof(((type *)0)->member) *__mptr = (ptr);	\
+	(type *)((char *)__mptr - offsetof(type, member));	\
+})
+
+#ifndef USE_SIMPLE_SYNC_SRCU
+/* Abuse udelay to make sure that busy loops terminate. */
+#define udelay(x) assume(0)
+
+#else
+
+/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
+#define udelay(x) do { } while (0)
+#endif
+
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
+
+#define notrace
+
+/* Avoid including rcupdate.h */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+
+void wakeme_after_rcu(struct rcu_head *head);
+
+#define rcu_lock_acquire(a) do { } while (0)
+#define rcu_lock_release(a) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
+
+/* Let CBMC non-deterministically choose switch between normal and expedited. */
+bool rcu_gp_is_normal(void);
+bool rcu_gp_is_expedited(void);
+
+/* Do the same for old versions of rcu. */
+#define rcu_expedited (rcu_gp_is_expedited())
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
new file mode 100644
index 000000000000..3de5a49de49b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
@@ -0,0 +1,92 @@
+#ifndef PERCPU_H
+#define PERCPU_H
+
+#include <stddef.h>
+#include "bug_on.h"
+#include "preempt.h"
+
+#define __percpu
+
+/* Maximum size of any percpu data. */
+#define PERCPU_OFFSET (4 * sizeof(long))
+
+/* Ignore alignment, as CBMC doesn't care about false sharing. */
+#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
+
+static inline void *__alloc_percpu(size_t size, size_t align)
+{
+	BUG();
+	return NULL;
+}
+
+static inline void free_percpu(void *ptr)
+{
+	BUG();
+}
+
+#define per_cpu_ptr(ptr, cpu) \
+	((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
+
+#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
+#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+/* Make CBMC use atomics to work around bug. */
+#ifdef RUN
+#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
+#else
+/*
+ * Split the atomic into a read and a write so that it has the least
+ * possible ordering.
+ */
+#define THIS_CPU_ADD_HELPER(ptr, x) \
+	do { \
+		typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
+		typeof(ptr) this_cpu_add_helper_x = (x); \
+		typeof(*ptr) this_cpu_add_helper_temp; \
+		__CPROVER_atomic_begin(); \
+		this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
+		__CPROVER_atomic_end(); \
+		this_cpu_add_helper_temp += this_cpu_add_helper_x; \
+		__CPROVER_atomic_begin(); \
+		*(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
+		__CPROVER_atomic_end(); \
+	} while (0)
+#endif
+
+/*
+ * For some reason CBMC needs an atomic operation even though this is percpu
+ * data.
+ */
+#define __this_cpu_add(pcp, n) \
+	do { \
+		BUG_ON(preemptible()); \
+		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
+				    (typeof(pcp)) (n)); \
+	} while (0)
+
+#define this_cpu_add(pcp, n) \
+	do { \
+		int this_cpu_add_impl_cpu = get_cpu(); \
+		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
+				    (typeof(pcp)) (n)); \
+		put_cpu(); \
+	} while (0)
+
+/*
+ * This will cause a compiler warning because of the cast from char[][] to
+ * type*. This will cause a compile time error if type is too big.
+ */
+#define DEFINE_PER_CPU(type, name) \
+	char name[NR_CPUS][PERCPU_OFFSET]; \
+	typedef char percpu_too_big_##name \
+		[sizeof(type) > PERCPU_OFFSET ? -1 : 1]
+
+#define for_each_possible_cpu(cpu) \
+	for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
new file mode 100644
index 000000000000..4f1b068e9b7a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
@@ -0,0 +1,78 @@
+#include <config.h>
+
+#include "preempt.h"
+
+#include "assume.h"
+#include "locks.h"
+
+/* Support NR_CPUS of at most 64 */
+#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
+#define CPU_PREEMPTION_LOCKS_INIT1 \
+	CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
+#define CPU_PREEMPTION_LOCKS_INIT2 \
+	CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
+#define CPU_PREEMPTION_LOCKS_INIT3 \
+	CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
+#define CPU_PREEMPTION_LOCKS_INIT4 \
+	CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
+#define CPU_PREEMPTION_LOCKS_INIT5 \
+	CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
+
+/*
+ * Simulate disabling preemption by locking a particular cpu. NR_CPUS
+ * should be the actual number of cpus, not just the maximum.
+ */
+struct lock_impl cpu_preemption_locks[NR_CPUS] = {
+	CPU_PREEMPTION_LOCKS_INIT0
+#if (NR_CPUS - 1) & 1
+	, CPU_PREEMPTION_LOCKS_INIT0
+#endif
+#if (NR_CPUS - 1) & 2
+	, CPU_PREEMPTION_LOCKS_INIT1
+#endif
+#if (NR_CPUS - 1) & 4
+	, CPU_PREEMPTION_LOCKS_INIT2
+#endif
+#if (NR_CPUS - 1) & 8
+	, CPU_PREEMPTION_LOCKS_INIT3
+#endif
+#if (NR_CPUS - 1) & 16
+	, CPU_PREEMPTION_LOCKS_INIT4
+#endif
+#if (NR_CPUS - 1) & 32
+	, CPU_PREEMPTION_LOCKS_INIT5
+#endif
+};
+
+#undef CPU_PREEMPTION_LOCKS_INIT0
+#undef CPU_PREEMPTION_LOCKS_INIT1
+#undef CPU_PREEMPTION_LOCKS_INIT2
+#undef CPU_PREEMPTION_LOCKS_INIT3
+#undef CPU_PREEMPTION_LOCKS_INIT4
+#undef CPU_PREEMPTION_LOCKS_INIT5
+
+__thread int thread_cpu_id;
+__thread int preempt_disable_count;
+
+void preempt_disable(void)
+{
+	BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
+
+	if (preempt_disable_count++)
+		return;
+
+	thread_cpu_id = nondet_int();
+	assume(thread_cpu_id >= 0);
+	assume(thread_cpu_id < NR_CPUS);
+	lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
+}
+
+void preempt_enable(void)
+{
+	BUG_ON(preempt_disable_count < 1);
+
+	if (--preempt_disable_count)
+		return;
+
+	lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
new file mode 100644
index 000000000000..2f95ee0e4dd5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
@@ -0,0 +1,58 @@
+#ifndef PREEMPT_H
+#define PREEMPT_H
+
+#include <stdbool.h>
+
+#include "bug_on.h"
+
+/* This flag contains garbage if preempt_disable_count is 0. */
+extern __thread int thread_cpu_id;
+
+/* Support recursive preemption disabling. */
+extern __thread int preempt_disable_count;
+
+void preempt_disable(void);
+void preempt_enable(void);
+
+static inline void preempt_disable_notrace(void)
+{
+	preempt_disable();
+}
+
+static inline void preempt_enable_no_resched(void)
+{
+	preempt_enable();
+}
+
+static inline void preempt_enable_notrace(void)
+{
+	preempt_enable();
+}
+
+static inline int preempt_count(void)
+{
+	return preempt_disable_count;
+}
+
+static inline bool preemptible(void)
+{
+	return !preempt_count();
+}
+
+static inline int get_cpu(void)
+{
+	preempt_disable();
+	return thread_cpu_id;
+}
+
+static inline void put_cpu(void)
+{
+	preempt_enable();
+}
+
+static inline void might_sleep(void)
+{
+	BUG_ON(preempt_disable_count);
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
new file mode 100644
index 000000000000..ac9cbc62b411
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
@@ -0,0 +1,50 @@
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#include <linux/srcu.h>
+
+/* Functions needed from modify_srcu.c */
+bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
+void srcu_flip(struct srcu_struct *sp);
+
+/* Simpler implementation of synchronize_srcu that ignores batching. */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	int idx;
+	/*
+	 * This code assumes that try_check_zero will succeed anyway,
+	 * so there is no point in multiple tries.
+	 */
+	const int trycount = 1;
+
+	might_sleep();
+
+	/* Ignore the lock, as multiple writers aren't working yet anyway. */
+
+	idx = 1 ^ (sp->completed & 1);
+
+	/* For comments see srcu_advance_batches. */
+
+	assume(try_check_zero(sp, idx, trycount));
+
+	srcu_flip(sp);
+
+	assume(try_check_zero(sp, idx^1, trycount));
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
new file mode 100644
index 000000000000..e58c8dfd3e90
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
@@ -0,0 +1,102 @@
+#ifndef WORKQUEUES_H
+#define WORKQUEUES_H
+
+#include <stdbool.h>
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "int_typedefs.h"
+
+#include <linux/types.h>
+
+/* Stub workqueue implementation. */
+
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
+
+struct work_struct {
+/*	atomic_long_t data; */
+	unsigned long data;
+
+	struct list_head entry;
+	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
+};
+
+struct timer_list {
+	struct hlist_node	entry;
+	unsigned long		expires;
+	void			(*function)(unsigned long);
+	unsigned long		data;
+	u32			flags;
+	int			slack;
+};
+
+struct delayed_work {
+	struct work_struct work;
+	struct timer_list timer;
+
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
+	int cpu;
+};
+
+
+static inline bool schedule_work(struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+
+static inline bool queue_work(struct workqueue_struct *wq,
+			      struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+				      struct delayed_work *dwork,
+				      unsigned long delay)
+{
+	BUG();
+	return true;
+}
+
+#define INIT_WORK(w, f) \
+	do { \
+		(w)->data = 0; \
+		(w)->func = (f); \
+	} while (0)
+
+#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
+
+#define __WORK_INITIALIZER(n, f) { \
+		.data = 0, \
+		.entry = { &(n).entry, &(n).entry }, \
+		.func = f \
+	}
+
+/* Don't bother initializing timer. */
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
+	.work = __WORK_INITIALIZER((n).work, (f)), \
+	}
+
+#define DECLARE_WORK(n, f) \
+	struct workqueue_struct n = __WORK_INITIALIZER
+
+#define DECLARE_DELAYED_WORK(n, f) \
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
+
+#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
new file mode 100644
index 000000000000..f47cb2045f13
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
@@ -0,0 +1 @@
+*.out
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
new file mode 100644
index 000000000000..3a3aee149225
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
@@ -0,0 +1,11 @@
+CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
+
+all:
+	for i in ./*.pass; do \
+		echo $$i ; \
+		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
+	done
+	for i in ./*.fail; do \
+		echo $$i ; \
+		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
+	done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
new file mode 100644
index 000000000000..40c8075919d1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
new file mode 100644
index 000000000000..ada5baf0b60d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
new file mode 100644
index 000000000000..8fe00c8db466
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
new file mode 100644
index 000000000000..612ed6772844
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
new file mode 100644
index 000000000000..470b1105a112
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
@@ -0,0 +1,72 @@
+#include <src/combined_source.c>
+
+int x;
+int y;
+
+int __unbuffered_tpr_x;
+int __unbuffered_tpr_y;
+
+DEFINE_SRCU(ss);
+
+void rcu_reader(void)
+{
+	int idx;
+
+#ifndef FORCE_FAILURE_3
+	idx = srcu_read_lock(&ss);
+#endif
+	might_sleep();
+
+	__unbuffered_tpr_y = READ_ONCE(y);
+#ifdef FORCE_FAILURE
+	srcu_read_unlock(&ss, idx);
+	idx = srcu_read_lock(&ss);
+#endif
+	WRITE_ONCE(x, 1);
+
+#ifndef FORCE_FAILURE_3
+	srcu_read_unlock(&ss, idx);
+#endif
+	might_sleep();
+}
+
+void *thread_update(void *arg)
+{
+	WRITE_ONCE(y, 1);
+#ifndef FORCE_FAILURE_2
+	synchronize_srcu(&ss);
+#endif
+	might_sleep();
+	__unbuffered_tpr_x = READ_ONCE(x);
+
+	return NULL;
+}
+
+void *thread_process_reader(void *arg)
+{
+	rcu_reader();
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	pthread_t tu;
+	pthread_t tpr;
+
+	if (pthread_create(&tu, NULL, thread_update, NULL))
+		abort();
+	if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
+		abort();
+	if (pthread_join(tu, NULL))
+		abort();
+	if (pthread_join(tpr, NULL))
+		abort();
+	assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
+
+#ifdef ASSERT_END
+	assert(0);
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
new file mode 100755
index 000000000000..d1545972a0fa
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
@@ -0,0 +1,102 @@
+#!/bin/sh
+
+# This script expects a mode (either --should-pass or --should-fail) followed by
+# an input file. The script uses the following environment variables. The test C
+# source file is expected to be named test.c in the directory containing the
+# input file.
+#
+# CBMC: The command to run CBMC. Default: cbmc
+# CBMC_FLAGS: Additional flags to pass to CBMC
+# NR_CPUS: Number of cpus to run tests with. Default specified by the test
+# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
+#                 kernel: Version included in the linux kernel source.
+#                 simple: Use try_check_zero directly.
+#
+# The input file is a script that is sourced by this file. It can define any of
+# the following variables to configure the test.
+#
+# test_cbmc_options: Extra options to pass to CBMC.
+# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
+#                The test is expected to pass if it is run with fewer. (Only
+#                useful for .fail files)
+# default_cpus: Quantity of CPUs to use for the test, if not specified on the
+#               command line. Default: Larger of 2 and MIN_CPUS_FAIL.
+
+set -e
+
+if test "$#" -ne 2; then
+	echo "Expected one option followed by an input file" 1>&2
+	exit 99
+fi
+
+if test "x$1" = "x--should-pass"; then
+	should_pass="yes"
+elif test "x$1" = "x--should-fail"; then
+	should_pass="no"
+else
+	echo "Unrecognized argument '$1'" 1>&2
+
+	# Exit code 99 indicates a hard error.
+	exit 99
+fi
+
+CBMC=${CBMC:-cbmc}
+
+SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
+
+case ${SYNC_SRCU_MODE} in
+kernel) sync_srcu_mode_flags="" ;;
+simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
+
+*)
+	echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
+	exit 99
+	;;
+esac
+
+min_cpus_fail=1
+
+c_file=`dirname "$2"`/test.c
+
+# Source the input file.
+. $2
+
+if test ${min_cpus_fail} -gt 2; then
+	default_default_cpus=${min_cpus_fail}
+else
+	default_default_cpus=2
+fi
+default_cpus=${default_cpus:-${default_default_cpus}}
+cpus=${NR_CPUS:-${default_cpus}}
+
+# Check if there are two few cpus to make the test fail.
+if test $cpus -lt ${min_cpus_fail:-0}; then
+	should_pass="yes"
+fi
+
+cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
+
+echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
+if ${CBMC} ${cbmc_opts} "${c_file}"; then
+	# Verification successful. Make sure that it was supposed to verify.
+	test "x${should_pass}" = xyes
+else
+	cbmc_exit_status=$?
+
+	# An exit status of 10 indicates a failed verification.
+	# (see cbmc_parse_optionst::do_bmc in the CBMC source code)
+	if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
+		:
+	else
+		echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
+
+		# Parse errors have exit status 6. Any other type of error
+		# should be considered a hard error.
+		if test ${cbmc_exit_status} -ne 6 && \
+		   test ${cbmc_exit_status} -ne 10; then
+			exit 99
+		else
+			exit 1
+		fi
+	fi
+fi
-- 
cgit v1.2.3


From 0901df3aeaee6cb5dde2fd51089786aff6225ebf Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 26 Jan 2017 00:42:49 +0100
Subject: bpf: use prefix_len in test_tag when reading fdinfo

We currently used len instead of prefix_len for the strncmp() in
fdinfo on the prog_tag. It still worked as we matched on the correct
output line also with first 8 instead of 10 chars, but lets fix it
properly to use the intended length.

Fixes: 62b64660262a ("bpf: add prog tag test case to bpf selftests")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_tag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 6ab4793b3d16..5f7c602f47d1 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -99,7 +99,7 @@ static void tag_from_fdinfo(int fd_prog, uint8_t *tag, uint32_t len)
 	assert(fp);
 
 	while (fgets(buff, sizeof(buff), fp)) {
-		if (strncmp(buff, "prog_tag:\t", len))
+		if (strncmp(buff, "prog_tag:\t", prefix_len))
 			continue;
 		ret = hex2bin(tag, buff + prefix_len, len);
 		break;
-- 
cgit v1.2.3


From c68a2aab3300df4106f368568bd7361d6f465993 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 11:52:43 -0500
Subject: radix tree test suite: Remove duplicate bitops code

By adding __set_bit and __clear_bit to the tools include directory, we
can share the bitops code.  This reveals an include loop between kernel.h,
log2.h, bitmap.h and bitops.h.  Break it the same way as the kernel does;
by moving the kernel.h include from bitops.h to bitmap.h.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/include/asm-generic/bitops/atomic.h          |   3 +
 tools/include/linux/bitmap.h                       |   1 +
 tools/include/linux/bitops.h                       |   1 -
 tools/testing/radix-tree/linux/bitops.h            | 160 ---------------------
 tools/testing/radix-tree/linux/bitops/__ffs.h      |  43 ------
 tools/testing/radix-tree/linux/bitops/ffs.h        |  41 ------
 tools/testing/radix-tree/linux/bitops/ffz.h        |  12 --
 tools/testing/radix-tree/linux/bitops/find.h       |  13 --
 tools/testing/radix-tree/linux/bitops/fls.h        |  41 ------
 tools/testing/radix-tree/linux/bitops/fls64.h      |  14 --
 tools/testing/radix-tree/linux/bitops/hweight.h    |  11 --
 tools/testing/radix-tree/linux/bitops/le.h         |  53 -------
 tools/testing/radix-tree/linux/bitops/non-atomic.h | 110 --------------
 tools/testing/radix-tree/linux/kernel.h            |   6 +-
 14 files changed, 8 insertions(+), 501 deletions(-)
 delete mode 100644 tools/testing/radix-tree/linux/bitops.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/__ffs.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/ffs.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/ffz.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/find.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/fls.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/fls64.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/hweight.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/le.h
 delete mode 100644 tools/testing/radix-tree/linux/bitops/non-atomic.h

(limited to 'tools/testing')

diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 18663f59d72f..68b8c1516c5a 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -20,4 +20,7 @@ static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
 		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
 }
 
+#define __set_bit(nr, addr)	set_bit(nr, addr)
+#define __clear_bit(nr, addr)	clear_bit(nr, addr)
+
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index eef41d500e9e..e8b9f518e36b 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -4,6 +4,7 @@
 #include <string.h>
 #include <linux/bitops.h>
 #include <stdlib.h>
+#include <linux/kernel.h>
 
 #define DECLARE_BITMAP(name,bits) \
 	unsigned long name[BITS_TO_LONGS(bits)]
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
index fc446343ff41..1aecad369af5 100644
--- a/tools/include/linux/bitops.h
+++ b/tools/include/linux/bitops.h
@@ -2,7 +2,6 @@
 #define _TOOLS_LINUX_BITOPS_H_
 
 #include <asm/types.h>
-#include <linux/kernel.h>
 #include <linux/compiler.h>
 
 #ifndef __WORDSIZE
diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h
deleted file mode 100644
index a13e9bc76eec..000000000000
--- a/tools/testing/radix-tree/linux/bitops.h
+++ /dev/null
@@ -1,160 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-
-#include <linux/types.h>
-#include <linux/bitops/find.h>
-#include <linux/bitops/hweight.h>
-#include <linux/kernel.h>
-
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITS_PER_BYTE		8
-#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p  |= mask;
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p &= ~mask;
-}
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p ^= mask;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old | mask;
-	return (old & mask) != 0;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old & ~mask;
-	return (old & mask) != 0;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
-					    volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old ^ mask;
-	return (old & mask) != 0;
-}
-
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	int num = 0;
-
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
-}
-
-unsigned long find_next_bit(const unsigned long *addr,
-			    unsigned long size,
-			    unsigned long offset);
-
-static inline unsigned long hweight_long(unsigned long w)
-{
-	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
-}
-
-#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/__ffs.h b/tools/testing/radix-tree/linux/bitops/__ffs.h
deleted file mode 100644
index 9a3274aecf83..000000000000
--- a/tools/testing/radix-tree/linux/bitops/__ffs.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS___FFS_H_
-#define _ASM_GENERIC_BITOPS___FFS_H_
-
-#include <asm/types.h>
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
-	int num = 0;
-
-#if BITS_PER_LONG == 64
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-#endif
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
-}
-
-#endif /* _ASM_GENERIC_BITOPS___FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffs.h b/tools/testing/radix-tree/linux/bitops/ffs.h
deleted file mode 100644
index fbbb43af7dc0..000000000000
--- a/tools/testing/radix-tree/linux/bitops/ffs.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FFS_H_
-#define _ASM_GENERIC_BITOPS_FFS_H_
-
-/**
- * ffs - find first bit set
- * @x: the word to search
- *
- * This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static inline int ffs(int x)
-{
-	int r = 1;
-
-	if (!x)
-		return 0;
-	if (!(x & 0xffff)) {
-		x >>= 16;
-		r += 16;
-	}
-	if (!(x & 0xff)) {
-		x >>= 8;
-		r += 8;
-	}
-	if (!(x & 0xf)) {
-		x >>= 4;
-		r += 4;
-	}
-	if (!(x & 3)) {
-		x >>= 2;
-		r += 2;
-	}
-	if (!(x & 1)) {
-		x >>= 1;
-		r += 1;
-	}
-	return r;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffz.h b/tools/testing/radix-tree/linux/bitops/ffz.h
deleted file mode 100644
index 6744bd4cdf46..000000000000
--- a/tools/testing/radix-tree/linux/bitops/ffz.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FFZ_H_
-#define _ASM_GENERIC_BITOPS_FFZ_H_
-
-/*
- * ffz - find first zero in word.
- * @word: The word to search
- *
- * Undefined if no zero exists, so code should check against ~0UL first.
- */
-#define ffz(x)  __ffs(~(x))
-
-#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/find.h b/tools/testing/radix-tree/linux/bitops/find.h
deleted file mode 100644
index 72a51e5a12ef..000000000000
--- a/tools/testing/radix-tree/linux/bitops/find.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FIND_H_
-#define _ASM_GENERIC_BITOPS_FIND_H_
-
-extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
-		size, unsigned long offset);
-
-extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
-		long size, unsigned long offset);
-
-#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
-#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
-
-#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls.h b/tools/testing/radix-tree/linux/bitops/fls.h
deleted file mode 100644
index 850859bc5069..000000000000
--- a/tools/testing/radix-tree/linux/bitops/fls.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FLS_H_
-#define _ASM_GENERIC_BITOPS_FLS_H_
-
-/**
- * fls - find last (most-significant) bit set
- * @x: the word to search
- *
- * This is defined the same way as ffs.
- * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
- */
-
-static inline int fls(int x)
-{
-	int r = 32;
-
-	if (!x)
-		return 0;
-	if (!(x & 0xffff0000u)) {
-		x <<= 16;
-		r -= 16;
-	}
-	if (!(x & 0xff000000u)) {
-		x <<= 8;
-		r -= 8;
-	}
-	if (!(x & 0xf0000000u)) {
-		x <<= 4;
-		r -= 4;
-	}
-	if (!(x & 0xc0000000u)) {
-		x <<= 2;
-		r -= 2;
-	}
-	if (!(x & 0x80000000u)) {
-		x <<= 1;
-		r -= 1;
-	}
-	return r;
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls64.h b/tools/testing/radix-tree/linux/bitops/fls64.h
deleted file mode 100644
index 1b6b17ce2428..000000000000
--- a/tools/testing/radix-tree/linux/bitops/fls64.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
-#define _ASM_GENERIC_BITOPS_FLS64_H_
-
-#include <asm/types.h>
-
-static inline int fls64(__u64 x)
-{
-	__u32 h = x >> 32;
-	if (h)
-		return fls(h) + 32;
-	return fls(x);
-}
-
-#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/hweight.h b/tools/testing/radix-tree/linux/bitops/hweight.h
deleted file mode 100644
index fbbc383771da..000000000000
--- a/tools/testing/radix-tree/linux/bitops/hweight.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
-#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
-
-#include <asm/types.h>
-
-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
-
-#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/le.h b/tools/testing/radix-tree/linux/bitops/le.h
deleted file mode 100644
index b9c7e5d2d2ad..000000000000
--- a/tools/testing/radix-tree/linux/bitops/le.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_LE_H_
-#define _ASM_GENERIC_BITOPS_LE_H_
-
-#include <asm/types.h>
-#include <asm/byteorder.h>
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
-
-#if defined(__LITTLE_ENDIAN)
-
-#define generic_test_le_bit(nr, addr) test_bit(nr, addr)
-#define generic___set_le_bit(nr, addr) __set_bit(nr, addr)
-#define generic___clear_le_bit(nr, addr) __clear_bit(nr, addr)
-
-#define generic_test_and_set_le_bit(nr, addr) test_and_set_bit(nr, addr)
-#define generic_test_and_clear_le_bit(nr, addr) test_and_clear_bit(nr, addr)
-
-#define generic___test_and_set_le_bit(nr, addr) __test_and_set_bit(nr, addr)
-#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr)
-
-#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset)
-
-#elif defined(__BIG_ENDIAN)
-
-#define generic_test_le_bit(nr, addr) \
-	test_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___set_le_bit(nr, addr) \
-	__set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___clear_le_bit(nr, addr) \
-	__clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-#define generic_test_and_set_le_bit(nr, addr) \
-	test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic_test_and_clear_le_bit(nr, addr) \
-	test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-#define generic___test_and_set_le_bit(nr, addr) \
-	__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-#define generic___test_and_clear_le_bit(nr, addr) \
-	__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
-
-extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
-		unsigned long size, unsigned long offset);
-
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-
-#define generic_find_first_zero_le_bit(addr, size) \
-        generic_find_next_zero_le_bit((addr), (size), 0)
-
-#endif /* _ASM_GENERIC_BITOPS_LE_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h
deleted file mode 100644
index 6a1bcb9d2c4a..000000000000
--- a/tools/testing/radix-tree/linux/bitops/non-atomic.h
+++ /dev/null
@@ -1,110 +0,0 @@
-#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
-
-#include <asm/types.h>
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
-/**
- * __set_bit - Set a bit in memory
- * @nr: the bit to set
- * @addr: the address to start counting from
- *
- * Unlike set_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-	*p  |= mask;
-}
-
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-	*p &= ~mask;
-}
-
-/**
- * __change_bit - Toggle a bit in memory
- * @nr: the bit to change
- * @addr: the address to start counting from
- *
- * Unlike change_bit(), this function is non-atomic and may be reordered.
- * If it's called on the same region of memory simultaneously, the effect
- * may be that only one operation succeeds.
- */
-static inline void __change_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-
-	*p ^= mask;
-}
-
-/**
- * __test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old | mask;
-	return (old & mask) != 0;
-}
-
-/**
- * __test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- *
- * This operation is non-atomic and can be reordered.
- * If two examples of this operation race, one can appear to succeed
- * but actually fail.  You must protect multiple accesses with a lock.
- */
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old & ~mask;
-	return (old & mask) != 0;
-}
-
-/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr,
-					    volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
-	unsigned long old = *p;
-
-	*p = old ^ mask;
-	return (old & mask) != 0;
-}
-
-/**
- * test_bit - Determine whether a bit is set
- * @nr: bit number to test
- * @addr: Address to start counting from
- */
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-	return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
-
-#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 9b43b4975d83..1cd72a84a089 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -7,8 +7,10 @@
 #include <stddef.h>
 #include <limits.h>
 
-#include "../../include/linux/compiler.h"
-#include "../../include/linux/err.h"
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
 #include "../../../include/linux/kconfig.h"
 
 #ifdef BENCHMARK
-- 
cgit v1.2.3


From b246a9d2671f4f6cfab3f98199568e0b3028f6e5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 19 Dec 2016 11:07:17 -0500
Subject: tools: Provide a definition of WARN_ON

The definition of WARN_ON being used by the radix tree test suite was
deficient in two ways: it did not provide a return value, and it stopped
execution instead of continuing.  This version of WARN_ON tells you
which file & line the assertion was triggered in.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/include/asm/bug.h                 | 8 ++++++++
 tools/testing/radix-tree/linux/kernel.h | 1 -
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h
index beda1a884b50..4790f047a89c 100644
--- a/tools/include/asm/bug.h
+++ b/tools/include/asm/bug.h
@@ -12,6 +12,14 @@
 	unlikely(__ret_warn_on);		\
 })
 
+#define WARN_ON(condition) ({					\
+	int __ret_warn_on = !!(condition);			\
+	if (unlikely(__ret_warn_on))				\
+		__WARN_printf("assertion failed at %s:%d\n",	\
+				__FILE__, __LINE__);		\
+	unlikely(__ret_warn_on);				\
+})
+
 #define WARN_ON_ONCE(condition) ({			\
 	static int __warned;				\
 	int __ret_warn_once = !!(condition);		\
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 1cd72a84a089..9681463c91e2 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -24,7 +24,6 @@
 #endif
 
 #define BUG_ON(expr)	assert(!(expr))
-#define WARN_ON(expr)	assert(!(expr))
 #define __init
 #define __must_check
 #define panic(expr)
-- 
cgit v1.2.3


From e64d5fbe56259c94df504af8ce804cfc6a022adb Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 1 Feb 2017 14:56:29 -0800
Subject: x86/mpx: Re-add MPX to selftests Makefile

Ingo pointed out that the MPX tests were no longer in the selftests
Makefile.  It appears that I shot myself in the foot on this one
and accidentally removed them when I added the pkeys tests, probably
from bungling a merge conflict.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 5f23f6d082a9 ("x86/pkeys: Add self-tests")
Link: http://lkml.kernel.org/r/20170201225629.C3070852@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 8c1cb423cfe6..fefd95043fd7 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl \
+			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
 			protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
-- 
cgit v1.2.3


From 29200c199cc9bde59033ab30fcc40b6c8ae630b0 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 3 Feb 2017 16:25:23 -0500
Subject: bpf: test for AND edge cases

These two tests are based on the work done for f23cc643f9ba.  The first test is
just a basic one to make sure we don't allow AND'ing negative values, even if it
would result in a valid index for the array.  The second is a cleaned up version
of the original testcase provided by Jann Horn that resulted in the commit.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/test_verifier.c | 55 +++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 0d0912c7f03c..df194e1d56c2 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4370,6 +4370,61 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	},
+	{
+		"invalid and of negative number",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_1, 6),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"invalid range check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+			BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
+			BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
+			BPF_MOV32_IMM(BPF_REG_3, 1),
+			BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
+			BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	}
 };
 
 static int probe_filter_length(const struct bpf_insn *fp)
-- 
cgit v1.2.3


From 63dfef75ed75364901d7caa52c6420cec3e73519 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Sat, 4 Feb 2017 08:37:29 -0800
Subject: bpf: enable verifier to add 0 to packet ptr

The patch fixes the case when adding a zero value to the packet
pointer.  The zero value could come from src_reg equals type
BPF_K or CONST_IMM.  The patch fixes both, otherwise the verifer
reports the following error:
  [...]
    R0=imm0,min_value=0,max_value=0
    R1=pkt(id=0,off=0,r=4)
    R2=pkt_end R3=fp-12
    R4=imm4,min_value=4,max_value=4
    R5=pkt(id=0,off=4,r=4)
  269: (bf) r2 = r0     // r2 becomes imm0
  270: (77) r2 >>= 3
  271: (bf) r4 = r1     // r4 becomes pkt ptr
  272: (0f) r4 += r2    // r4 += 0
  addition of negative constant to packet pointer is not allowed

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: Mihai Budiu <mbudiu@vmware.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/verifier.c                       |  2 +-
 tools/testing/selftests/bpf/test_verifier.c | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fb3513b35c0b..1a754e5d2695 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1397,7 +1397,7 @@ static int check_packet_ptr_add(struct bpf_verifier_env *env,
 		imm = insn->imm;
 
 add_imm:
-		if (imm <= 0) {
+		if (imm < 0) {
 			verbose("addition of negative constant to packet pointer is not allowed\n");
 			return -EACCES;
 		}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index df194e1d56c2..71f6407cde60 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -2403,6 +2403,29 @@ static struct bpf_test tests[] = {
 		.result = ACCEPT,
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	},
+	{
+		"direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+			BPF_MOV64_IMM(BPF_REG_5, 12),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 	{
 		"helper access to packet: test1, valid packet_ptr range",
 		.insns = {
-- 
cgit v1.2.3


From 2e07c9f55ecabdf78746b9808fec6ab549a07f8a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2017 11:49:21 -0500
Subject: ktest: Have POST_TEST run after the test has totally completed

The POST_TEST config is to be executed after a test has fully compeleted,
whether the test passed or failed. It currently is executed at the moment
that the test has been decided if it failed or not. As the test does other
clean ups, it isn't truly finished. Move the POST_TEST execution to after
all the test cleanups have been done.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index d08e214ec6e7..caa90d391a96 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1412,6 +1412,10 @@ sub dodie {
 	    system("stty $stty_orig");
     }
 
+    if (defined($post_test)) {
+	run_command $post_test;
+    }
+
     die @_, "\n";
 }
 
@@ -1624,10 +1628,6 @@ sub save_logs {
 
 sub fail {
 
-	if (defined($post_test)) {
-		run_command $post_test;
-	}
-
 	if ($die_on_failure) {
 		dodie @_;
 	}
@@ -1660,6 +1660,10 @@ sub fail {
 	    save_logs "fail", $store_failures;
         }
 
+	if (defined($post_test)) {
+		run_command $post_test;
+	}
+
 	return 1;
 }
 
@@ -2489,10 +2493,6 @@ sub halt {
 sub success {
     my ($i) = @_;
 
-    if (defined($post_test)) {
-	run_command $post_test;
-    }
-
     $successes++;
 
     my $name = "";
@@ -2517,6 +2517,10 @@ sub success {
 	doprint "Reboot and wait $sleep_time seconds\n";
 	reboot_to_good $sleep_time;
     }
+
+    if (defined($post_test)) {
+	run_command $post_test;
+    }
 }
 
 sub answer_bisect {
-- 
cgit v1.2.3


From 32677207dcc5e594254b7fb4fb2352b1755b1d5b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2017 12:05:25 -0500
Subject: ktest: Fix child exit code processing

The child_exit errno needs to be shifted by 8 bits to compare against the
return values for the bisect variables.

Fixes: c5dacb88f0a64 ("ktest: Allow overriding bisect test results")
Cc: stable@vger.kernel.org
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index caa90d391a96..a64da242b824 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -2633,7 +2633,7 @@ sub do_run_test {
     }
 
     waitpid $child_pid, 0;
-    $child_exit = $?;
+    $child_exit = $? >> 8;
 
     my $end_time = time;
     $test_time = $end_time - $start_time;
-- 
cgit v1.2.3


From 6e98d1b4415fe681ceb245e1374ed5e1942a332b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2017 12:22:03 -0500
Subject: ktest: Add timeout to ssh command

Add a timeout to performing an ssh command. This will let testing if a
machine is alive or not, or if something else may be amiss. A timeout can be
passed to ssh, where ssh will fail if it does not complete within the given
timeout.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index a64da242b824..d9bdd3d6dba6 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1668,19 +1668,18 @@ sub fail {
 }
 
 sub run_command {
-    my ($command, $redirect) = @_;
+    my ($command, $redirect, $timeout) = @_;
     my $start_time;
     my $end_time;
     my $dolog = 0;
     my $dord = 0;
     my $pid;
 
-    $start_time = time;
-
     $command =~ s/\$SSH_USER/$ssh_user/g;
     $command =~ s/\$MACHINE/$machine/g;
 
     doprint("$command ... ");
+    $start_time = time;
 
     $pid = open(CMD, "$command 2>&1 |") or
 	(fail "unable to exec $command" and return 0);
@@ -1697,14 +1696,34 @@ sub run_command {
 	$dord = 1;
     }
 
-    while (<CMD>) {
-	print LOG if ($dolog);
-	print RD  if ($dord);
+    my $hit_timeout = 0;
+
+    while (1) {
+	my $fp = \*CMD;
+	if (defined($timeout)) {
+	    doprint "timeout = $timeout\n";
+	}
+	my $line = wait_for_input($fp, $timeout);
+	if (!defined($line)) {
+	    my $now = time;
+	    if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+		doprint "Hit timeout of $timeout, killing process\n";
+		$hit_timeout = 1;
+		kill 9, $pid;
+	    }
+	    last;
+	}
+	print LOG $line if ($dolog);
+	print RD $line if ($dord);
     }
 
     waitpid($pid, 0);
     my $failed = $?;
 
+    if ($hit_timeout) {
+	$failed = 1;
+    }
+
     close(CMD);
     close(LOG) if ($dolog);
     close(RD)  if ($dord);
@@ -1728,11 +1747,11 @@ sub run_command {
 }
 
 sub run_ssh {
-    my ($cmd) = @_;
+    my ($cmd, $timeout) = @_;
     my $cp_exec = $ssh_exec;
 
     $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
-    return run_command "$cp_exec";
+    return run_command "$cp_exec", undef , $timeout;
 }
 
 sub run_scp {
-- 
cgit v1.2.3


From 6474ace999edd714151c04c0b0534b89c2e9b6d8 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2017 12:13:08 -0500
Subject: ktest.pl: Powercycle the box on reboot if no connection can be made

When performing a reboot of the test box, try to ssh to it. If it can't
connect for 5 seconds, then powercycle the box. This is useful because the
reboot is done via ssh, and if you can't ssh to the box because it is hung,
the reboot fails to reboot.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index d9bdd3d6dba6..6a1484cc4436 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1325,26 +1325,44 @@ sub wait_for_monitor;
 
 sub reboot {
     my ($time) = @_;
+    my $powercycle = 0;
 
-    # Make sure everything has been written to disk
-    run_ssh("sync");
+    # test if the machine can be connected to within 5 seconds
+    my $stat = run_ssh("echo check machine status", 5);
+    if (!$stat) {
+	doprint("power cycle\n");
+	$powercycle = 1;
+    }
+
+    if ($powercycle) {
+	run_command "$power_cycle";
 
-    if (defined($time)) {
 	start_monitor;
 	# flush out current monitor
 	# May contain the reboot success line
 	wait_for_monitor 1;
-    }
 
-    # try to reboot normally
-    if (run_command $reboot) {
-	if (defined($powercycle_after_reboot)) {
-	    sleep $powercycle_after_reboot;
+    } else {
+	# Make sure everything has been written to disk
+	run_ssh("sync");
+
+	if (defined($time)) {
+	    start_monitor;
+	    # flush out current monitor
+	    # May contain the reboot success line
+	    wait_for_monitor 1;
+	}
+
+	# try to reboot normally
+	if (run_command $reboot) {
+	    if (defined($powercycle_after_reboot)) {
+		sleep $powercycle_after_reboot;
+		run_command "$power_cycle";
+	    }
+	} else {
+	    # nope? power cycle it.
 	    run_command "$power_cycle";
 	}
-    } else {
-	# nope? power cycle it.
-	run_command "$power_cycle";
     }
 
     if (defined($time)) {
-- 
cgit v1.2.3


From 5739438b725c09938bb6af9e4477c4283b8326cf Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2017 14:50:03 -0500
Subject: ktest: Add variable run_command_status to save status of commands
 executed

Create a variable called run_command_status that saves the status of the
executed commands and can be used by other functions later to test for
status.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 6a1484cc4436..29470b554711 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -179,6 +179,7 @@ my $localversion;
 my $iteration = 0;
 my $successes = 0;
 my $stty_orig;
+my $run_command_status = 0;
 
 my $bisect_good;
 my $bisect_bad;
@@ -1736,11 +1737,8 @@ sub run_command {
     }
 
     waitpid($pid, 0);
-    my $failed = $?;
-
-    if ($hit_timeout) {
-	$failed = 1;
-    }
+    # shift 8 for real exit status
+    $run_command_status = $? >> 8;
 
     close(CMD);
     close(LOG) if ($dolog);
@@ -1755,13 +1753,17 @@ sub run_command {
 	doprint "[$delta seconds] ";
     }
 
-    if ($failed) {
+    if ($hit_timeout) {
+	$run_command_status = 1;
+    }
+
+    if ($run_command_status) {
 	doprint "FAILED!\n";
     } else {
 	doprint "SUCCESS\n";
     }
 
-    return !$failed;
+    return !$run_command_status;
 }
 
 sub run_ssh {
@@ -2578,16 +2580,15 @@ sub answer_bisect {
 }
 
 sub child_run_test {
-    my $failed = 0;
 
     # child should have no power
     $reboot_on_error = 0;
     $poweroff_on_error = 0;
     $die_on_failure = 1;
 
-    run_command $run_test, $testlog or $failed = 1;
+    run_command $run_test, $testlog;
 
-    exit $failed;
+    exit $run_command_status;
 }
 
 my $child_done;
@@ -3371,7 +3372,6 @@ sub config_bisect {
     save_config \%good_configs, $good_config;
     save_config \%bad_configs, $bad_config;
 
-
     if (defined($config_bisect_check) && $config_bisect_check ne "0") {
 	if ($config_bisect_check ne "good") {
 	    doprint "Testing bad config\n";
-- 
cgit v1.2.3


From 16846c2d96b0d7b3f7123257c4b58b06768c6d65 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 3 Feb 2017 10:51:34 -0800
Subject: selftests, x86, protection_keys: fix uninitialized variable warning

'orig_pkru' might have been uninitialized here.  Fix it.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/x86/protection_keys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index 46f53ec502f6..bccc6f2a2f7e 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -462,7 +462,7 @@ void pkey_disable_set(int pkey, int flags)
 	unsigned long syscall_flags = 0;
 	int ret;
 	int pkey_rights;
-	u32 orig_pkru;
+	u32 orig_pkru = rdpkru();
 
 	dprintf1("START->%s(%d, 0x%x)\n", __func__,
 		pkey, flags);
-- 
cgit v1.2.3


From 2195bff041486eb7fcceaf058acaedcd057efbdc Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 3 Feb 2017 10:51:35 -0800
Subject: selftests, x86, protection_keys: fix wrong offset in siginfo

The siginfo contains a bunch of information about the fault.
For protection keys, it tells us which protection key's
permissions were violated.

The wrong offset in here leads to reading garbage and thus
failures in the tests.

We should probably eventually move this over to using the
kernel's headers defining the siginfo instead of a hard-coded
offset.  But, for now, just do the simplest fix.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/x86/protection_keys.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
index bccc6f2a2f7e..f39c5bc6e01a 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -192,7 +192,7 @@ void lots_o_noops_around_write(int *write_to_me)
 #define SYS_pkey_alloc	 381
 #define SYS_pkey_free	 382
 #define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x18
+#define si_pkey_offset 0x14
 #else
 #define SYS_mprotect_key 329
 #define SYS_pkey_alloc	 330
-- 
cgit v1.2.3


From 7f73f39a89c25c04ac684661ee61edcae476eb15 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:36 +0100
Subject: bpf: Change the include directory for selftest
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the tools include directory instead of the installed one to allow
builds from other kernels.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 769a6cb42b4b..c470c7301636 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,4 +1,4 @@
-CFLAGS += -Wall -O2 -I../../../../usr/include
+CFLAGS += -Wall -O2 -I../../../include/uapi
 
 test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
-- 
cgit v1.2.3


From d02d8986a7688d3f0ff6ef61aa6beb41427692eb Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:37 +0100
Subject: bpf: Always test unprivileged programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If selftests are run as root, then execute the unprivileged checks as
well. This switch from 243 to 368 tests.

The test numbers are suffixed with "/u" when executed as unprivileged or
with "/p" when executed as privileged.

The geteuid() check is replaced with a capability check.

Handling capabilities requires the libcap dependency.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile        |  2 +-
 tools/testing/selftests/bpf/test_verifier.c | 68 ++++++++++++++++++++++++++---
 2 files changed, 64 insertions(+), 6 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c470c7301636..f3d65ad53494 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,4 +1,4 @@
-CFLAGS += -Wall -O2 -I../../../include/uapi
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi
 
 test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 71f6407cde60..878bd60da376 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -16,6 +16,7 @@
 #include <stdbool.h>
 #include <sched.h>
 
+#include <sys/capability.h>
 #include <sys/resource.h>
 
 #include <linux/unistd.h>
@@ -4574,6 +4575,55 @@ fail_log:
 	goto close_fds;
 }
 
+static bool is_admin(void)
+{
+	cap_t caps;
+	cap_flag_value_t sysadmin = CAP_CLEAR;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+
+	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
+		perror("cap_get_flag");
+		return false;
+	}
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return false;
+	}
+	if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
+		perror("cap_get_flag");
+	if (cap_free(caps))
+		perror("cap_free");
+	return (sysadmin == CAP_SET);
+}
+
+static int set_admin(bool admin)
+{
+	cap_t caps;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return -1;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+				admin ? CAP_SET : CAP_CLEAR)) {
+		perror("cap_set_flag");
+		goto out;
+	}
+	if (cap_set_proc(caps)) {
+		perror("cap_set_proc");
+		goto out;
+	}
+	ret = 0;
+out:
+	if (cap_free(caps))
+		perror("cap_free");
+	return ret;
+}
+
 static int do_test(bool unpriv, unsigned int from, unsigned int to)
 {
 	int i, passes = 0, errors = 0;
@@ -4584,11 +4634,19 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
 		/* Program types that are not supported by non-root we
 		 * skip right away.
 		 */
-		if (unpriv && test->prog_type)
-			continue;
+		if (!test->prog_type) {
+			if (!unpriv)
+				set_admin(false);
+			printf("#%d/u %s ", i, test->descr);
+			do_test_single(test, true, &passes, &errors);
+			if (!unpriv)
+				set_admin(true);
+		}
 
-		printf("#%d %s ", i, test->descr);
-		do_test_single(test, unpriv, &passes, &errors);
+		if (!unpriv) {
+			printf("#%d/p %s ", i, test->descr);
+			do_test_single(test, false, &passes, &errors);
+		}
 	}
 
 	printf("Summary: %d PASSED, %d FAILED\n", passes, errors);
@@ -4600,7 +4658,7 @@ int main(int argc, char **argv)
 	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
 	struct rlimit rlim = { 1 << 20, 1 << 20 };
 	unsigned int from = 0, to = ARRAY_SIZE(tests);
-	bool unpriv = geteuid() != 0;
+	bool unpriv = !is_admin();
 
 	if (argc == 3) {
 		unsigned int l = atoi(argv[argc - 2]);
-- 
cgit v1.2.3


From 2ee89fb9a942e250b5adb5535de4acca14bb7fa2 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:38 +0100
Subject: bpf: Use bpf_load_program() from the library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace bpf_prog_load() with bpf_load_program() calls.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                         |  6 +++---
 tools/lib/bpf/bpf.h                         |  4 ++--
 tools/testing/selftests/bpf/Makefile        |  4 +++-
 tools/testing/selftests/bpf/bpf_sys.h       | 21 ---------------------
 tools/testing/selftests/bpf/test_tag.c      |  6 ++++--
 tools/testing/selftests/bpf/test_verifier.c |  8 +++++---
 6 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 3ddb58a36d3c..58ce252073fa 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -42,7 +42,7 @@
 # endif
 #endif
 
-static __u64 ptr_to_u64(void *ptr)
+static __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
 }
@@ -69,8 +69,8 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size,
 	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
-int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
-		     size_t insns_cnt, char *license,
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf, size_t log_buf_sz)
 {
 	int fd;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index a2f9853dd882..bc959a2de023 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -28,8 +28,8 @@ int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
 
 /* Recommend log buffer size */
 #define BPF_LOG_BUF_SIZE 65536
-int bpf_load_program(enum bpf_prog_type type, struct bpf_insn *insns,
-		     size_t insns_cnt, char *license,
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		     size_t insns_cnt, const char *license,
 		     __u32 kern_version, char *log_buf,
 		     size_t log_buf_sz);
 
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f3d65ad53494..a35f564f66a1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,4 +1,4 @@
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I../../../lib
 
 test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
@@ -7,6 +7,8 @@ TEST_FILES := $(test_objs)
 
 all: $(test_objs)
 
+$(test_objs): ../../../lib/bpf/bpf.o
+
 include ../lib.mk
 
 clean:
diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
index 6b4565f2a3f2..e7bbe3e5402e 100644
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ b/tools/testing/selftests/bpf/bpf_sys.h
@@ -84,25 +84,4 @@ static inline int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
 	return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
 }
 
-static inline int bpf_prog_load(enum bpf_prog_type type,
-				const struct bpf_insn *insns, size_t size_insns,
-				const char *license, char *log, size_t size_log)
-{
-	union bpf_attr attr = {};
-
-	attr.prog_type = type;
-	attr.insns = bpf_ptr_to_u64(insns);
-	attr.insn_cnt = size_insns / sizeof(struct bpf_insn);
-	attr.license = bpf_ptr_to_u64(license);
-
-	if (size_log > 0) {
-		attr.log_buf = bpf_ptr_to_u64(log);
-		attr.log_size = size_log;
-		attr.log_level = 1;
-		log[0] = 0;
-	}
-
-	return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
 #endif /* __BPF_SYS__ */
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 5f7c602f47d1..dc209721ffd5 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -16,6 +16,8 @@
 #include <linux/bpf.h>
 #include <linux/if_alg.h>
 
+#include <bpf/bpf.h>
+
 #include "../../../include/linux/filter.h"
 
 #include "bpf_sys.h"
@@ -55,8 +57,8 @@ static int bpf_try_load_prog(int insns, int fd_map,
 	int fd_prog;
 
 	bpf_filler(insns, fd_map);
-	fd_prog = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, prog, insns *
-				sizeof(struct bpf_insn), "", NULL, 0);
+	fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+				   NULL, 0);
 	assert(fd_prog > 0);
 	if (fd_map > 0)
 		bpf_filler(insns, 0);
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 878bd60da376..247830ecf68e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -24,6 +24,8 @@
 #include <linux/bpf_perf_event.h>
 #include <linux/bpf.h>
 
+#include <bpf/bpf.h>
+
 #include "../../../include/linux/filter.h"
 
 #include "bpf_sys.h"
@@ -4535,9 +4537,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 
 	do_test_fixup(test, prog, &fd_f1, &fd_f2, &fd_f3);
 
-	fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				prog, prog_len * sizeof(struct bpf_insn),
-				"GPL", bpf_vlog, sizeof(bpf_vlog));
+	fd_prog = bpf_load_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				   prog, prog_len, "GPL", 0, bpf_vlog,
+				   sizeof(bpf_vlog));
 
 	expected_ret = unpriv && test->result_unpriv != UNDEF ?
 		       test->result_unpriv : test->result;
-- 
cgit v1.2.3


From 10ecc728fe12dbd206e2d4d8b6e96082632b969c Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:39 +0100
Subject: bpf: Use bpf_map_update_elem() from the library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace bpf_map_update() with bpf_map_update_elem() calls.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                        |  2 +-
 tools/lib/bpf/bpf.h                        |  2 +-
 tools/testing/selftests/bpf/bpf_sys.h      | 13 ----
 tools/testing/selftests/bpf/test_lpm_map.c | 15 ++---
 tools/testing/selftests/bpf/test_lru_map.c | 97 +++++++++++++++++-------------
 tools/testing/selftests/bpf/test_maps.c    | 61 ++++++++++---------
 6 files changed, 99 insertions(+), 91 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 58ce252073fa..1de762677a2f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -98,7 +98,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 	return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
 }
 
-int bpf_map_update_elem(int fd, void *key, void *value,
+int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags)
 {
 	union bpf_attr attr;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index bc959a2de023..2458534c8b33 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -33,7 +33,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 		     __u32 kern_version, char *log_buf,
 		     size_t log_buf_sz);
 
-int bpf_map_update_elem(int fd, void *key, void *value,
+int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags);
 
 int bpf_map_lookup_elem(int fd, void *key, void *value);
diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
index e7bbe3e5402e..e08dec0db9e0 100644
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ b/tools/testing/selftests/bpf/bpf_sys.h
@@ -35,19 +35,6 @@ static inline int bpf_map_lookup(int fd, const void *key, void *value)
 	return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 
-static inline int bpf_map_update(int fd, const void *key, const void *value,
-				 uint64_t flags)
-{
-	union bpf_attr attr = {};
-
-	attr.map_fd = fd;
-	attr.key = bpf_ptr_to_u64(key);
-	attr.value = bpf_ptr_to_u64(value);
-	attr.flags = flags;
-
-	return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
-}
-
 static inline int bpf_map_delete(int fd, const void *key)
 {
 	union bpf_attr attr = {};
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 26775c00273f..e29ffbcd2932 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -22,6 +22,7 @@
 #include <sys/time.h>
 #include <sys/resource.h>
 
+#include <bpf/bpf.h>
 #include "bpf_sys.h"
 #include "bpf_util.h"
 
@@ -198,7 +199,7 @@ static void test_lpm_map(int keysize)
 
 		key->prefixlen = value[keysize];
 		memcpy(key->data, value, keysize);
-		r = bpf_map_update(map, key, value, 0);
+		r = bpf_map_update_elem(map, key, value, 0);
 		assert(!r);
 	}
 
@@ -266,32 +267,32 @@ static void test_lpm_ipaddr(void)
 	value = 1;
 	key_ipv4->prefixlen = 16;
 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
-	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
 
 	value = 2;
 	key_ipv4->prefixlen = 24;
 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
-	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
 
 	value = 3;
 	key_ipv4->prefixlen = 24;
 	inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
-	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
 
 	value = 5;
 	key_ipv4->prefixlen = 24;
 	inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
-	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
 
 	value = 4;
 	key_ipv4->prefixlen = 23;
 	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
-	assert(bpf_map_update(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
 
 	value = 0xdeadbeef;
 	key_ipv6->prefixlen = 64;
 	inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
-	assert(bpf_map_update(map_fd_ipv6, key_ipv6, &value, 0) == 0);
+	assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0);
 
 	/* Set tprefixlen to maximum for lookups */
 	key_ipv4->prefixlen = 32;
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 9f7bd1915c21..2f61b5817af4 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -18,6 +18,7 @@
 #include <sys/wait.h>
 #include <sys/resource.h>
 
+#include <bpf/bpf.h>
 #include "bpf_sys.h"
 #include "bpf_util.h"
 
@@ -119,15 +120,16 @@ static void test_lru_sanity0(int map_type, int map_flags)
 	/* insert key=1 element */
 
 	key = 1;
-	assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
-	assert(!bpf_map_update(expected_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				    BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
 	       /* key=1 already exists */
-	       errno == EEXIST);
+	       && errno == EEXIST);
 
-	assert(bpf_map_update(lru_map_fd, &key, value, -1) == -1 &&
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
 	       errno == EINVAL);
 
 	/* insert key=2 element */
@@ -138,11 +140,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
 	       errno == ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
 	       /* key=2 is not there */
 	       errno == ENOENT);
 
-	assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
 	/* insert key=3 element */
 
@@ -159,8 +161,9 @@ static void test_lru_sanity0(int map_type, int map_flags)
 	assert(value[0] == 1234);
 
 	key = 3;
-	assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
-	assert(!bpf_map_update(expected_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				    BPF_NOEXIST));
 
 	/* key=2 has been removed from the LRU */
 	key = 2;
@@ -217,14 +220,15 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	/* Insert 1 to tgt_free (+tgt_free keys) */
 	end_key = 1 + tgt_free;
 	for (key = 1; key < end_key; key++)
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 
 	/* Lookup 1 to tgt_free/2 */
 	end_key = 1 + batch_size;
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				            BPF_NOEXIST));
 	}
 
 	/* Insert 1+tgt_free to 2*tgt_free
@@ -234,9 +238,10 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	key = 1 + tgt_free;
 	end_key = key + tgt_free;
 	for (; key < end_key; key++) {
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -301,9 +306,10 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	/* Insert 1 to tgt_free (+tgt_free keys) */
 	end_key = 1 + tgt_free;
 	for (key = 1; key < end_key; key++)
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 
-	/* Any bpf_map_update will require to acquire a new node
+	/* Any bpf_map_update_elem will require to acquire a new node
 	 * from LRU first.
 	 *
 	 * The local list is running out of free nodes.
@@ -316,10 +322,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	 */
 	key = 1;
 	if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 		assert(!bpf_map_delete(lru_map_fd, &key));
 	} else {
-		assert(bpf_map_update(lru_map_fd, &key, value, BPF_EXIST));
+		assert(bpf_map_update_elem(lru_map_fd, &key, value,
+					   BPF_EXIST));
 	}
 
 	/* Re-insert 1 to tgt_free/2 again and do a lookup
@@ -329,11 +337,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	value[0] = 4321;
 	for (key = 1; key < end_key; key++) {
 		assert(bpf_map_lookup(lru_map_fd, &key, value));
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
 		assert(value[0] == 4321);
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				            BPF_NOEXIST));
 	}
 
 	value[0] = 1234;
@@ -344,14 +353,16 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		/* These newly added but not referenced keys will be
 		 * gone during the next LRU shrink.
 		 */
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 
 	/* Insert 1+tgt_free*3/2 to  tgt_free*5/2 */
 	end_key = key + tgt_free;
 	for (; key < end_key; key++) {
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				            BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -401,14 +412,15 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	/* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
 	end_key = 1 + (2 * tgt_free);
 	for (key = 1; key < end_key; key++)
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 
 	/* Lookup key 1 to tgt_free*3/2 */
 	end_key = tgt_free + batch_size;
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				            BPF_NOEXIST));
 	}
 
 	/* Add 1+2*tgt_free to tgt_free*5/2
@@ -417,9 +429,10 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	key = 2 * tgt_free + 1;
 	end_key = key + batch_size;
 	for (; key < end_key; key++) {
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				            BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -457,15 +470,16 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 	value[0] = 1234;
 
 	for (key = 1; key <= 2 * tgt_free; key++)
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
 
 	key = 1;
-	assert(bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
 	for (key = 1; key <= tgt_free; key++) {
 		assert(!bpf_map_lookup(lru_map_fd, &key, value));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
 	}
 
 	for (; key <= 2 * tgt_free; key++) {
@@ -475,9 +489,10 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 
 	end_key = key + 2 * tgt_free;
 	for (; key < end_key; key++) {
-		assert(!bpf_map_update(lru_map_fd, &key, value, BPF_NOEXIST));
-		assert(!bpf_map_update(expected_map_fd, &key, value,
-				       BPF_NOEXIST));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				            BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -498,7 +513,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
 	value[0] = 1234;
 
 	key = last_key + 1;
-	assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
 	assert(!bpf_map_lookup(map_fd, &key, value));
 
 	/* Cannot find the last key because it was removed by LRU */
@@ -523,7 +538,7 @@ static void test_lru_sanity5(int map_type, int map_flags)
 
 	value[0] = 1234;
 	key = 0;
-	assert(!bpf_map_update(map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
 
 	while (sched_next_online(0, &next_cpu) != -1) {
 		pid_t pid;
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index eedfef8d2946..d4b9ba6d6a0e 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -21,6 +21,7 @@
 
 #include <linux/bpf.h>
 
+#include <bpf/bpf.h>
 #include "bpf_sys.h"
 #include "bpf_util.h"
 
@@ -41,16 +42,17 @@ static void test_hashmap(int task, void *data)
 	key = 1;
 	value = 1234;
 	/* Insert key=1 element. */
-	assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0);
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
 	value = 0;
 	/* BPF_NOEXIST means add new element if it doesn't exist. */
-	assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       /* key=1 already exists. */
 	       errno == EEXIST);
 
 	/* -1 is an invalid flag. */
-	assert(bpf_map_update(fd, &key, &value, -1) == -1 && errno == EINVAL);
+	assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+	       errno == EINVAL);
 
 	/* Check that key=1 can be found. */
 	assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 1234);
@@ -60,27 +62,27 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT);
 
 	/* BPF_EXIST means update existing element. */
-	assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
 	       /* key=2 is not there. */
 	       errno == ENOENT);
 
 	/* Insert key=2 element. */
-	assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == 0);
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
 
 	/* key=1 and key=2 were inserted, check that key=0 cannot be
 	 * inserted due to max_entries limit.
 	 */
 	key = 0;
-	assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       errno == E2BIG);
 
 	/* Update existing element, though the map is full. */
 	key = 1;
-	assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == 0);
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
 	key = 2;
-	assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0);
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 	key = 1;
-	assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0);
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
 	/* Check that key = 0 doesn't exist. */
 	key = 0;
@@ -130,16 +132,17 @@ static void test_hashmap_percpu(int task, void *data)
 	key = 1;
 	/* Insert key=1 element. */
 	assert(!(expected_key_mask & key));
-	assert(bpf_map_update(fd, &key, value, BPF_ANY) == 0);
+	assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
 	expected_key_mask |= key;
 
 	/* BPF_NOEXIST means add new element if it doesn't exist. */
-	assert(bpf_map_update(fd, &key, value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
 	       /* key=1 already exists. */
 	       errno == EEXIST);
 
 	/* -1 is an invalid flag. */
-	assert(bpf_map_update(fd, &key, value, -1) == -1 && errno == EINVAL);
+	assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+	       errno == EINVAL);
 
 	/* Check that key=1 can be found. Value could be 0 if the lookup
 	 * was run from a different CPU.
@@ -152,20 +155,20 @@ static void test_hashmap_percpu(int task, void *data)
 	assert(bpf_map_lookup(fd, &key, value) == -1 && errno == ENOENT);
 
 	/* BPF_EXIST means update existing element. */
-	assert(bpf_map_update(fd, &key, value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
 	       /* key=2 is not there. */
 	       errno == ENOENT);
 
 	/* Insert key=2 element. */
 	assert(!(expected_key_mask & key));
-	assert(bpf_map_update(fd, &key, value, BPF_NOEXIST) == 0);
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
 	expected_key_mask |= key;
 
 	/* key=1 and key=2 were inserted, check that key=0 cannot be
 	 * inserted due to max_entries limit.
 	 */
 	key = 0;
-	assert(bpf_map_update(fd, &key, value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
 	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
@@ -187,7 +190,7 @@ static void test_hashmap_percpu(int task, void *data)
 
 	/* Update with BPF_EXIST. */
 	key = 1;
-	assert(bpf_map_update(fd, &key, value, BPF_EXIST) == 0);
+	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
 
 	/* Delete both elements. */
 	key = 1;
@@ -219,10 +222,10 @@ static void test_arraymap(int task, void *data)
 	key = 1;
 	value = 1234;
 	/* Insert key=1 element. */
-	assert(bpf_map_update(fd, &key, &value, BPF_ANY) == 0);
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
 	value = 0;
-	assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
@@ -236,7 +239,7 @@ static void test_arraymap(int task, void *data)
 	 * due to max_entries limit.
 	 */
 	key = 2;
-	assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
 	       errno == E2BIG);
 
 	/* Check that key = 2 doesn't exist. */
@@ -275,10 +278,10 @@ static void test_arraymap_percpu(int task, void *data)
 
 	key = 1;
 	/* Insert key=1 element. */
-	assert(bpf_map_update(fd, &key, values, BPF_ANY) == 0);
+	assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
 
 	values[0] = 0;
-	assert(bpf_map_update(fd, &key, values, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
@@ -291,7 +294,7 @@ static void test_arraymap_percpu(int task, void *data)
 
 	/* Check that key=2 cannot be inserted due to max_entries limit. */
 	key = 2;
-	assert(bpf_map_update(fd, &key, values, BPF_EXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
 	       errno == E2BIG);
 
 	/* Check that key = 2 doesn't exist. */
@@ -331,7 +334,7 @@ static void test_arraymap_percpu_many_keys(void)
 		values[i] = i + 10;
 
 	for (key = 0; key < nr_keys; key++)
-		assert(bpf_map_update(fd, &key, values, BPF_ANY) == 0);
+		assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
 
 	for (key = 0; key < nr_keys; key++) {
 		for (i = 0; i < nr_cpus; i++)
@@ -368,11 +371,11 @@ static void test_map_large(void)
 		key = (struct bigkey) { .c = i };
 		value = i;
 
-		assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == 0);
+		assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
 	}
 
 	key.c = -1;
-	assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       errno == E2BIG);
 
 	/* Iterate through all elements. */
@@ -437,8 +440,10 @@ static void do_work(int fn, void *data)
 		key = value = i;
 
 		if (do_update) {
-			assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == 0);
-			assert(bpf_map_update(fd, &key, &value, BPF_EXIST) == 0);
+			assert(bpf_map_update_elem(fd, &key, &value,
+						   BPF_NOEXIST) == 0);
+			assert(bpf_map_update_elem(fd, &key, &value,
+						   BPF_EXIST) == 0);
 		} else {
 			assert(bpf_map_delete(fd, &key) == 0);
 		}
@@ -468,7 +473,7 @@ static void test_map_parallel(void)
 	run_parallel(TASKS, do_work, data);
 
 	/* Check that key=0 is already there. */
-	assert(bpf_map_update(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
 	       errno == EEXIST);
 
 	/* Check that all elements were inserted. */
-- 
cgit v1.2.3


From e5ff7c4019c6cb6e86bc9d6d16e8a8f921133c70 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:40 +0100
Subject: bpf: Use bpf_map_lookup_elem() from the library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace bpf_map_lookup() with bpf_map_lookup_elem() calls.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                        |  2 +-
 tools/lib/bpf/bpf.h                        |  2 +-
 tools/testing/selftests/bpf/bpf_sys.h      | 11 -----------
 tools/testing/selftests/bpf/test_lpm_map.c | 16 ++++++++--------
 tools/testing/selftests/bpf/test_lru_map.c | 28 ++++++++++++++--------------
 tools/testing/selftests/bpf/test_maps.c    | 30 +++++++++++++++---------------
 6 files changed, 39 insertions(+), 50 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 1de762677a2f..b1a1f58b99e0 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -112,7 +112,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
 	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_map_lookup_elem(int fd, void *key, void *value)
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
 {
 	union bpf_attr attr;
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 2458534c8b33..171cf594f782 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -36,7 +36,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags);
 
-int bpf_map_lookup_elem(int fd, void *key, void *value);
+int bpf_map_lookup_elem(int fd, const void *key, void *value);
 int bpf_map_delete_elem(int fd, void *key);
 int bpf_map_get_next_key(int fd, void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
index e08dec0db9e0..0a5a6060db70 100644
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ b/tools/testing/selftests/bpf/bpf_sys.h
@@ -24,17 +24,6 @@ static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
 #endif
 }
 
-static inline int bpf_map_lookup(int fd, const void *key, void *value)
-{
-	union bpf_attr attr = {};
-
-	attr.map_fd = fd;
-	attr.key = bpf_ptr_to_u64(key);
-	attr.value = bpf_ptr_to_u64(value);
-
-	return bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
-}
-
 static inline int bpf_map_delete(int fd, const void *key)
 {
 	union bpf_attr attr = {};
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index e29ffbcd2932..bd08394c26cb 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -211,7 +211,7 @@ static void test_lpm_map(int keysize)
 
 		key->prefixlen = 8 * keysize;
 		memcpy(key->data, data, keysize);
-		r = bpf_map_lookup(map, key, value);
+		r = bpf_map_lookup_elem(map, key, value);
 		assert(!r || errno == ENOENT);
 		assert(!t == !!r);
 
@@ -300,32 +300,32 @@ static void test_lpm_ipaddr(void)
 
 	/* Test some lookups that should come back with a value */
 	inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
-	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == 0);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
 	assert(value == 3);
 
 	inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
-	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == 0);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
 	assert(value == 2);
 
 	inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
-	assert(bpf_map_lookup(map_fd_ipv6, key_ipv6, &value) == 0);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
 	assert(value == 0xdeadbeef);
 
 	inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
-	assert(bpf_map_lookup(map_fd_ipv6, key_ipv6, &value) == 0);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
 	assert(value == 0xdeadbeef);
 
 	/* Test some lookups that should not match any entry */
 	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
-	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == -1 &&
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
 	       errno == ENOENT);
 
 	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
-	assert(bpf_map_lookup(map_fd_ipv4, key_ipv4, &value) == -1 &&
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
 	       errno == ENOENT);
 
 	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
-	assert(bpf_map_lookup(map_fd_ipv6, key_ipv6, &value) == -1 &&
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
 	       errno == ENOENT);
 
 	close(map_fd_ipv4);
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 2f61b5817af4..eccf6d96e551 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -47,8 +47,8 @@ static int map_subset(int map0, int map1)
 	int ret;
 
 	while (!bpf_map_next_key(map1, &next_key, &next_key)) {
-		assert(!bpf_map_lookup(map1, &next_key, value1));
-		ret = bpf_map_lookup(map0, &next_key, value0);
+		assert(!bpf_map_lookup_elem(map1, &next_key, value1));
+		ret = bpf_map_lookup_elem(map0, &next_key, value0);
 		if (ret) {
 			printf("key:%llu not found from map. %s(%d)\n",
 			       next_key, strerror(errno), errno);
@@ -136,7 +136,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup(lru_map_fd, &key, value) == -1 &&
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
 	       errno == ENOENT);
 
 	/* BPF_EXIST means: update existing element */
@@ -150,14 +150,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup(lru_map_fd, &key, value) == -1 &&
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
 	       errno == ENOENT);
 
 	/* check that key=1 can be found and mark the ref bit to
 	 * stop LRU from removing key=1
 	 */
 	key = 1;
-	assert(!bpf_map_lookup(lru_map_fd, &key, value));
+	assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 	assert(value[0] == 1234);
 
 	key = 3;
@@ -167,7 +167,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* key=2 has been removed from the LRU */
 	key = 2;
-	assert(bpf_map_lookup(lru_map_fd, &key, value) == -1);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1);
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
@@ -226,7 +226,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	/* Lookup 1 to tgt_free/2 */
 	end_key = 1 + batch_size;
 	for (key = 1; key < end_key; key++) {
-		assert(!bpf_map_lookup(lru_map_fd, &key, value));
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
 				            BPF_NOEXIST));
 	}
@@ -336,10 +336,10 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	end_key = 1 + batch_size;
 	value[0] = 4321;
 	for (key = 1; key < end_key; key++) {
-		assert(bpf_map_lookup(lru_map_fd, &key, value));
+		assert(bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
-		assert(!bpf_map_lookup(lru_map_fd, &key, value));
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(value[0] == 4321);
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
 				            BPF_NOEXIST));
@@ -418,7 +418,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	/* Lookup key 1 to tgt_free*3/2 */
 	end_key = tgt_free + batch_size;
 	for (key = 1; key < end_key; key++) {
-		assert(!bpf_map_lookup(lru_map_fd, &key, value));
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
 				            BPF_NOEXIST));
 	}
@@ -477,7 +477,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
 	for (key = 1; key <= tgt_free; key++) {
-		assert(!bpf_map_lookup(lru_map_fd, &key, value));
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
 					    BPF_NOEXIST));
 	}
@@ -508,16 +508,16 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
 	unsigned long long key, value[nr_cpus];
 
 	/* Ensure the last key inserted by previous CPU can be found */
-	assert(!bpf_map_lookup(map_fd, &last_key, value));
+	assert(!bpf_map_lookup_elem(map_fd, &last_key, value));
 
 	value[0] = 1234;
 
 	key = last_key + 1;
 	assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
-	assert(!bpf_map_lookup(map_fd, &key, value));
+	assert(!bpf_map_lookup_elem(map_fd, &key, value));
 
 	/* Cannot find the last key because it was removed by LRU */
-	assert(bpf_map_lookup(map_fd, &last_key, value));
+	assert(bpf_map_lookup_elem(map_fd, &last_key, value));
 }
 
 /* Test map with only one element */
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index d4b9ba6d6a0e..5db1a939af69 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -55,11 +55,11 @@ static void test_hashmap(int task, void *data)
 	       errno == EINVAL);
 
 	/* Check that key=1 can be found. */
-	assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 1234);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
 
 	key = 2;
 	/* Check that key=2 is not found. */
-	assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
 	/* BPF_EXIST means update existing element. */
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
@@ -148,11 +148,11 @@ static void test_hashmap_percpu(int task, void *data)
 	 * was run from a different CPU.
 	 */
 	value[0] = 1;
-	assert(bpf_map_lookup(fd, &key, value) == 0 && value[0] == 100);
+	assert(bpf_map_lookup_elem(fd, &key, value) == 0 && value[0] == 100);
 
 	key = 2;
 	/* Check that key=2 is not found. */
-	assert(bpf_map_lookup(fd, &key, value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
 
 	/* BPF_EXIST means update existing element. */
 	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
@@ -179,7 +179,7 @@ static void test_hashmap_percpu(int task, void *data)
 		assert((expected_key_mask & next_key) == next_key);
 		expected_key_mask &= ~next_key;
 
-		assert(bpf_map_lookup(fd, &next_key, value) == 0);
+		assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
 
 		for (i = 0; i < nr_cpus; i++)
 			assert(value[i] == i + 100);
@@ -229,11 +229,11 @@ static void test_arraymap(int task, void *data)
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
-	assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 1234);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
 
 	key = 0;
 	/* Check that key=0 is also found and zero initialized. */
-	assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 0);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
 
 	/* key=0 and key=1 were inserted, check that key=2 cannot be inserted
 	 * due to max_entries limit.
@@ -243,7 +243,7 @@ static void test_arraymap(int task, void *data)
 	       errno == E2BIG);
 
 	/* Check that key = 2 doesn't exist. */
-	assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_next_key(fd, &key, &next_key) == 0 &&
@@ -285,11 +285,11 @@ static void test_arraymap_percpu(int task, void *data)
 	       errno == EEXIST);
 
 	/* Check that key=1 can be found. */
-	assert(bpf_map_lookup(fd, &key, values) == 0 && values[0] == 100);
+	assert(bpf_map_lookup_elem(fd, &key, values) == 0 && values[0] == 100);
 
 	key = 0;
 	/* Check that key=0 is also found and zero initialized. */
-	assert(bpf_map_lookup(fd, &key, values) == 0 &&
+	assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
 	       values[0] == 0 && values[nr_cpus - 1] == 0);
 
 	/* Check that key=2 cannot be inserted due to max_entries limit. */
@@ -298,7 +298,7 @@ static void test_arraymap_percpu(int task, void *data)
 	       errno == E2BIG);
 
 	/* Check that key = 2 doesn't exist. */
-	assert(bpf_map_lookup(fd, &key, values) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_next_key(fd, &key, &next_key) == 0 &&
@@ -340,7 +340,7 @@ static void test_arraymap_percpu_many_keys(void)
 		for (i = 0; i < nr_cpus; i++)
 			values[i] = 0;
 
-		assert(bpf_map_lookup(fd, &key, values) == 0);
+		assert(bpf_map_lookup_elem(fd, &key, values) == 0);
 
 		for (i = 0; i < nr_cpus; i++)
 			assert(values[i] == i + 10);
@@ -384,9 +384,9 @@ static void test_map_large(void)
 	assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT);
 
 	key.c = 0;
-	assert(bpf_map_lookup(fd, &key, &value) == 0 && value == 0);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
 	key.a = 1;
-	assert(bpf_map_lookup(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
 	close(fd);
 }
@@ -486,7 +486,7 @@ static void test_map_parallel(void)
 	for (i = 0; i < MAP_SIZE; i++) {
 		key = MAP_SIZE - i - 1;
 
-		assert(bpf_map_lookup(fd, &key, &value) == 0 &&
+		assert(bpf_map_lookup_elem(fd, &key, &value) == 0 &&
 		       value == key);
 	}
 
-- 
cgit v1.2.3


From e58383b803499bd623b737070038af94d0b8a3c7 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:41 +0100
Subject: bpf: Use bpf_map_delete_elem() from the library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace bpf_map_delete() with bpf_map_delete_elem() calls.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                        |  2 +-
 tools/lib/bpf/bpf.h                        |  2 +-
 tools/testing/selftests/bpf/bpf_sys.h      | 10 ----------
 tools/testing/selftests/bpf/test_lru_map.c |  6 +++---
 tools/testing/selftests/bpf/test_maps.c    | 22 +++++++++++-----------
 5 files changed, 16 insertions(+), 26 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index b1a1f58b99e0..eab8c6bfbf8f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -124,7 +124,7 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value)
 	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_map_delete_elem(int fd, void *key)
+int bpf_map_delete_elem(int fd, const void *key)
 {
 	union bpf_attr attr;
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 171cf594f782..f559f648db45 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -37,7 +37,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags);
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
-int bpf_map_delete_elem(int fd, void *key);
+int bpf_map_delete_elem(int fd, const void *key);
 int bpf_map_get_next_key(int fd, void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
 int bpf_obj_get(const char *pathname);
diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
index 0a5a6060db70..17581a42e1d9 100644
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ b/tools/testing/selftests/bpf/bpf_sys.h
@@ -24,16 +24,6 @@ static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
 #endif
 }
 
-static inline int bpf_map_delete(int fd, const void *key)
-{
-	union bpf_attr attr = {};
-
-	attr.map_fd = fd;
-	attr.key = bpf_ptr_to_u64(key);
-
-	return bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
-}
-
 static inline int bpf_map_next_key(int fd, const void *key, void *next_key)
 {
 	union bpf_attr attr = {};
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index eccf6d96e551..859c940a6e41 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -324,7 +324,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
-		assert(!bpf_map_delete(lru_map_fd, &key));
+		assert(!bpf_map_delete_elem(lru_map_fd, &key));
 	} else {
 		assert(bpf_map_update_elem(lru_map_fd, &key, value,
 					   BPF_EXIST));
@@ -483,8 +483,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 	}
 
 	for (; key <= 2 * tgt_free; key++) {
-		assert(!bpf_map_delete(lru_map_fd, &key));
-		assert(bpf_map_delete(lru_map_fd, &key));
+		assert(!bpf_map_delete_elem(lru_map_fd, &key));
+		assert(bpf_map_delete_elem(lru_map_fd, &key));
 	}
 
 	end_key = key + 2 * tgt_free;
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 5db1a939af69..0f9f90455375 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -86,7 +86,7 @@ static void test_hashmap(int task, void *data)
 
 	/* Check that key = 0 doesn't exist. */
 	key = 0;
-	assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	assert(bpf_map_next_key(fd, &key, &next_key) == 0 &&
@@ -98,10 +98,10 @@ static void test_hashmap(int task, void *data)
 
 	/* Delete both elements. */
 	key = 1;
-	assert(bpf_map_delete(fd, &key) == 0);
+	assert(bpf_map_delete_elem(fd, &key) == 0);
 	key = 2;
-	assert(bpf_map_delete(fd, &key) == 0);
-	assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) == 0);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	key = 0;
 	/* Check that map is empty. */
@@ -172,7 +172,7 @@ static void test_hashmap_percpu(int task, void *data)
 	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
-	assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
 	while (!bpf_map_next_key(fd, &key, &next_key)) {
@@ -194,10 +194,10 @@ static void test_hashmap_percpu(int task, void *data)
 
 	/* Delete both elements. */
 	key = 1;
-	assert(bpf_map_delete(fd, &key) == 0);
+	assert(bpf_map_delete_elem(fd, &key) == 0);
 	key = 2;
-	assert(bpf_map_delete(fd, &key) == 0);
-	assert(bpf_map_delete(fd, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_delete_elem(fd, &key) == 0);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	key = 0;
 	/* Check that map is empty. */
@@ -255,7 +255,7 @@ static void test_arraymap(int task, void *data)
 
 	/* Delete shouldn't succeed. */
 	key = 1;
-	assert(bpf_map_delete(fd, &key) == -1 && errno == EINVAL);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
 
 	close(fd);
 }
@@ -310,7 +310,7 @@ static void test_arraymap_percpu(int task, void *data)
 
 	/* Delete shouldn't succeed. */
 	key = 1;
-	assert(bpf_map_delete(fd, &key) == -1 && errno == EINVAL);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
 
 	close(fd);
 }
@@ -445,7 +445,7 @@ static void do_work(int fn, void *data)
 			assert(bpf_map_update_elem(fd, &key, &value,
 						   BPF_EXIST) == 0);
 		} else {
-			assert(bpf_map_delete(fd, &key) == 0);
+			assert(bpf_map_delete_elem(fd, &key) == 0);
 		}
 	}
 }
-- 
cgit v1.2.3


From 5f155c2563592b1908a7df2dcbd44893fde3e419 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:42 +0100
Subject: bpf: Use bpf_map_get_next_key() from the library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace bpf_map_next_key() with bpf_map_get_next_key() calls.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                        |  2 +-
 tools/lib/bpf/bpf.h                        |  2 +-
 tools/testing/selftests/bpf/bpf_sys.h      | 11 ----------
 tools/testing/selftests/bpf/test_lru_map.c |  2 +-
 tools/testing/selftests/bpf/test_maps.c    | 34 +++++++++++++++---------------
 5 files changed, 20 insertions(+), 31 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index eab8c6bfbf8f..f8a2b7fa7741 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -135,7 +135,7 @@ int bpf_map_delete_elem(int fd, const void *key)
 	return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
 }
 
-int bpf_map_get_next_key(int fd, void *key, void *next_key)
+int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
 	union bpf_attr attr;
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index f559f648db45..88f07c15423a 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -38,7 +38,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value);
 int bpf_map_delete_elem(int fd, const void *key);
-int bpf_map_get_next_key(int fd, void *key, void *next_key);
+int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 int bpf_obj_pin(int fd, const char *pathname);
 int bpf_obj_get(const char *pathname);
 int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
index 17581a42e1d9..aeff99f0a411 100644
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ b/tools/testing/selftests/bpf/bpf_sys.h
@@ -24,17 +24,6 @@ static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
 #endif
 }
 
-static inline int bpf_map_next_key(int fd, const void *key, void *next_key)
-{
-	union bpf_attr attr = {};
-
-	attr.map_fd = fd;
-	attr.key = bpf_ptr_to_u64(key);
-	attr.next_key = bpf_ptr_to_u64(next_key);
-
-	return bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
-}
-
 static inline int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
 				 uint32_t size_value, uint32_t max_elem,
 				 uint32_t flags)
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 859c940a6e41..360f7e006eb6 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -46,7 +46,7 @@ static int map_subset(int map0, int map1)
 	unsigned long long value0[nr_cpus], value1[nr_cpus];
 	int ret;
 
-	while (!bpf_map_next_key(map1, &next_key, &next_key)) {
+	while (!bpf_map_get_next_key(map1, &next_key, &next_key)) {
 		assert(!bpf_map_lookup_elem(map1, &next_key, value1));
 		ret = bpf_map_lookup_elem(map0, &next_key, value0);
 		if (ret) {
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 0f9f90455375..be52c808d6cf 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -89,11 +89,11 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
-	assert(bpf_map_next_key(fd, &key, &next_key) == 0 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       (next_key == 1 || next_key == 2));
-	assert(bpf_map_next_key(fd, &next_key, &next_key) == 0 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
 	       (next_key == 1 || next_key == 2));
-	assert(bpf_map_next_key(fd, &next_key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
 	       errno == ENOENT);
 
 	/* Delete both elements. */
@@ -105,7 +105,7 @@ static void test_hashmap(int task, void *data)
 
 	key = 0;
 	/* Check that map is empty. */
-	assert(bpf_map_next_key(fd, &key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
 	       errno == ENOENT);
 
 	close(fd);
@@ -175,7 +175,7 @@ static void test_hashmap_percpu(int task, void *data)
 	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
-	while (!bpf_map_next_key(fd, &key, &next_key)) {
+	while (!bpf_map_get_next_key(fd, &key, &next_key)) {
 		assert((expected_key_mask & next_key) == next_key);
 		expected_key_mask &= ~next_key;
 
@@ -201,7 +201,7 @@ static void test_hashmap_percpu(int task, void *data)
 
 	key = 0;
 	/* Check that map is empty. */
-	assert(bpf_map_next_key(fd, &key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
 	       errno == ENOENT);
 
 	close(fd);
@@ -246,11 +246,11 @@ static void test_arraymap(int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
-	assert(bpf_map_next_key(fd, &key, &next_key) == 0 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       next_key == 0);
-	assert(bpf_map_next_key(fd, &next_key, &next_key) == 0 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
 	       next_key == 1);
-	assert(bpf_map_next_key(fd, &next_key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
 	       errno == ENOENT);
 
 	/* Delete shouldn't succeed. */
@@ -301,11 +301,11 @@ static void test_arraymap_percpu(int task, void *data)
 	assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
 
 	/* Iterate over two elements. */
-	assert(bpf_map_next_key(fd, &key, &next_key) == 0 &&
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
 	       next_key == 0);
-	assert(bpf_map_next_key(fd, &next_key, &next_key) == 0 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
 	       next_key == 1);
-	assert(bpf_map_next_key(fd, &next_key, &next_key) == -1 &&
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
 	       errno == ENOENT);
 
 	/* Delete shouldn't succeed. */
@@ -380,8 +380,8 @@ static void test_map_large(void)
 
 	/* Iterate through all elements. */
 	for (i = 0; i < MAP_SIZE; i++)
-		assert(bpf_map_next_key(fd, &key, &key) == 0);
-	assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
 
 	key.c = 0;
 	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
@@ -479,8 +479,8 @@ static void test_map_parallel(void)
 	/* Check that all elements were inserted. */
 	key = -1;
 	for (i = 0; i < MAP_SIZE; i++)
-		assert(bpf_map_next_key(fd, &key, &key) == 0);
-	assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
 
 	/* Another check for all elements */
 	for (i = 0; i < MAP_SIZE; i++) {
@@ -496,7 +496,7 @@ static void test_map_parallel(void)
 
 	/* Nothing should be left. */
 	key = -1;
-	assert(bpf_map_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
 }
 
 static void run_all_tests(void)
-- 
cgit v1.2.3


From f4874d01beba16a1bf2512929b9d460e003d7f3d Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:43 +0100
Subject: bpf: Use bpf_create_map() from the library
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace bpf_map_create() with bpf_create_map() calls.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/bpf_sys.h       | 15 ---------------
 tools/testing/selftests/bpf/test_lpm_map.c  |  6 +++---
 tools/testing/selftests/bpf/test_lru_map.c  |  4 ++--
 tools/testing/selftests/bpf/test_maps.c     | 14 +++++++-------
 tools/testing/selftests/bpf/test_tag.c      |  2 +-
 tools/testing/selftests/bpf/test_verifier.c |  4 ++--
 6 files changed, 15 insertions(+), 30 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
index aeff99f0a411..aa076a8a07f7 100644
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ b/tools/testing/selftests/bpf/bpf_sys.h
@@ -24,19 +24,4 @@ static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
 #endif
 }
 
-static inline int bpf_map_create(enum bpf_map_type type, uint32_t size_key,
-				 uint32_t size_value, uint32_t max_elem,
-				 uint32_t flags)
-{
-	union bpf_attr attr = {};
-
-	attr.map_type = type;
-	attr.key_size = size_key;
-	attr.value_size = size_value;
-	attr.max_entries = max_elem;
-	attr.map_flags = flags;
-
-	return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
-}
-
 #endif /* __BPF_SYS__ */
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index bd08394c26cb..3cc812cac2d7 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -183,7 +183,7 @@ static void test_lpm_map(int keysize)
 	key = alloca(sizeof(*key) + keysize);
 	memset(key, 0, sizeof(*key) + keysize);
 
-	map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE,
+	map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
 			     sizeof(*key) + keysize,
 			     keysize + 1,
 			     4096,
@@ -253,12 +253,12 @@ static void test_lpm_ipaddr(void)
 	key_ipv4 = alloca(key_size_ipv4);
 	key_ipv6 = alloca(key_size_ipv6);
 
-	map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE,
+	map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
 				     key_size_ipv4, sizeof(value),
 				     100, BPF_F_NO_PREALLOC);
 	assert(map_fd_ipv4 >= 0);
 
-	map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE,
+	map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
 				     key_size_ipv6, sizeof(value),
 				     100, BPF_F_NO_PREALLOC);
 	assert(map_fd_ipv6 >= 0);
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 360f7e006eb6..48973ded1c96 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -31,11 +31,11 @@ static int create_map(int map_type, int map_flags, unsigned int size)
 {
 	int map_fd;
 
-	map_fd = bpf_map_create(map_type, sizeof(unsigned long long),
+	map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
 				sizeof(unsigned long long), size, map_flags);
 
 	if (map_fd == -1)
-		perror("bpf_map_create");
+		perror("bpf_create_map");
 
 	return map_fd;
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index be52c808d6cf..39168499f43f 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -32,7 +32,7 @@ static void test_hashmap(int task, void *data)
 	long long key, next_key, value;
 	int fd;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
 			    2, map_flags);
 	if (fd < 0) {
 		printf("Failed to create hashmap '%s'!\n", strerror(errno));
@@ -119,7 +119,7 @@ static void test_hashmap_percpu(int task, void *data)
 	int expected_key_mask = 0;
 	int fd, i;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
+	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
 			    sizeof(value[0]), 2, map_flags);
 	if (fd < 0) {
 		printf("Failed to create hashmap '%s'!\n", strerror(errno));
@@ -212,7 +212,7 @@ static void test_arraymap(int task, void *data)
 	int key, next_key, fd;
 	long long value;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+	fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
 			    2, 0);
 	if (fd < 0) {
 		printf("Failed to create arraymap '%s'!\n", strerror(errno));
@@ -266,7 +266,7 @@ static void test_arraymap_percpu(int task, void *data)
 	int key, next_key, fd, i;
 	long values[nr_cpus];
 
-	fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
 			    sizeof(values[0]), 2, 0);
 	if (fd < 0) {
 		printf("Failed to create arraymap '%s'!\n", strerror(errno));
@@ -322,7 +322,7 @@ static void test_arraymap_percpu_many_keys(void)
 	long values[nr_cpus];
 	int key, fd, i;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
 			    sizeof(values[0]), nr_keys, 0);
 	if (fd < 0) {
 		printf("Failed to create per-cpu arraymap '%s'!\n",
@@ -360,7 +360,7 @@ static void test_map_large(void)
 	} key;
 	int fd, i, value;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
 			    MAP_SIZE, map_flags);
 	if (fd < 0) {
 		printf("Failed to create large map '%s'!\n", strerror(errno));
@@ -455,7 +455,7 @@ static void test_map_parallel(void)
 	int i, fd, key = 0, value = 0;
 	int data[2];
 
-	fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
 			    MAP_SIZE, map_flags);
 	if (fd < 0) {
 		printf("Failed to create map for parallel test '%s'!\n",
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index dc209721ffd5..ae4263638cd5 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -189,7 +189,7 @@ int main(void)
 	int i, fd_map;
 
 	setrlimit(RLIMIT_MEMLOCK, &rinf);
-	fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(int),
+	fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
 				sizeof(int), 1, BPF_F_NO_PREALLOC);
 	assert(fd_map > 0);
 
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 247830ecf68e..63818cbb9fb1 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4467,7 +4467,7 @@ static int create_map(uint32_t size_value, uint32_t max_elem)
 {
 	int fd;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_HASH, sizeof(long long),
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
 			    size_value, max_elem, BPF_F_NO_PREALLOC);
 	if (fd < 0)
 		printf("Failed to create hash map '%s'!\n", strerror(errno));
@@ -4479,7 +4479,7 @@ static int create_prog_array(void)
 {
 	int fd;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+	fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
 			    sizeof(int), 4, 0);
 	if (fd < 0)
 		printf("Failed to create prog array '%s'!\n", strerror(errno));
-- 
cgit v1.2.3


From 702498a1426bc95b6f49f9c5fba616110cbd3947 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:44 +0100
Subject: bpf: Remove bpf_sys.h from selftests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add require dependency headers.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/lib/bpf/bpf.c                         |  6 ++++++
 tools/testing/selftests/bpf/bpf_sys.h       | 27 ---------------------------
 tools/testing/selftests/bpf/test_lpm_map.c  |  1 -
 tools/testing/selftests/bpf/test_lru_map.c  |  1 -
 tools/testing/selftests/bpf/test_maps.c     |  1 -
 tools/testing/selftests/bpf/test_tag.c      |  3 +--
 tools/testing/selftests/bpf/test_verifier.c |  4 ++--
 7 files changed, 9 insertions(+), 34 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/bpf_sys.h

(limited to 'tools/testing')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index f8a2b7fa7741..50e04cc5dddd 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -50,7 +50,13 @@ static __u64 ptr_to_u64(const void *ptr)
 static int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
 		   unsigned int size)
 {
+#ifdef __NR_bpf
 	return syscall(__NR_bpf, cmd, attr, size);
+#else
+	fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
+	errno = ENOSYS;
+	return -1;
+#endif
 }
 
 int bpf_create_map(enum bpf_map_type map_type, int key_size,
diff --git a/tools/testing/selftests/bpf/bpf_sys.h b/tools/testing/selftests/bpf/bpf_sys.h
deleted file mode 100644
index aa076a8a07f7..000000000000
--- a/tools/testing/selftests/bpf/bpf_sys.h
+++ /dev/null
@@ -1,27 +0,0 @@
-#ifndef __BPF_SYS__
-#define __BPF_SYS__
-
-#include <stdint.h>
-#include <stdlib.h>
-
-#include <sys/syscall.h>
-
-#include <linux/bpf.h>
-
-static inline __u64 bpf_ptr_to_u64(const void *ptr)
-{
-	return (__u64)(unsigned long) ptr;
-}
-
-static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
-{
-#ifdef __NR_bpf
-	return syscall(__NR_bpf, cmd, attr, size);
-#else
-	fprintf(stderr, "No bpf syscall, kernel headers too old?\n");
-	errno = ENOSYS;
-	return -1;
-#endif
-}
-
-#endif /* __BPF_SYS__ */
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 3cc812cac2d7..e97565243d59 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -23,7 +23,6 @@
 #include <sys/resource.h>
 
 #include <bpf/bpf.h>
-#include "bpf_sys.h"
 #include "bpf_util.h"
 
 struct tlpm_node {
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 48973ded1c96..00b0aff56e2e 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -19,7 +19,6 @@
 #include <sys/resource.h>
 
 #include <bpf/bpf.h>
-#include "bpf_sys.h"
 #include "bpf_util.h"
 
 #define LOCAL_FREE_TARGET	(128)
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 39168499f43f..cada17ac00b8 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -22,7 +22,6 @@
 #include <linux/bpf.h>
 
 #include <bpf/bpf.h>
-#include "bpf_sys.h"
 #include "bpf_util.h"
 
 static int map_flags;
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index ae4263638cd5..de409fc50c35 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -1,3 +1,4 @@
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <ctype.h>
@@ -20,8 +21,6 @@
 
 #include "../../../include/linux/filter.h"
 
-#include "bpf_sys.h"
-
 static struct bpf_insn prog[BPF_MAXINSNS];
 
 static void bpf_gen_imm_prog(unsigned int insns, int fd_map)
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 63818cbb9fb1..e1f5b9eea1e8 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,7 +8,9 @@
  * License as published by the Free Software Foundation.
  */
 
+#include <stdint.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <errno.h>
 #include <string.h>
@@ -28,8 +30,6 @@
 
 #include "../../../include/linux/filter.h"
 
-#include "bpf_sys.h"
-
 #ifndef ARRAY_SIZE
 # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
-- 
cgit v1.2.3


From bc6a3d9977b6ea093cd7c567ef83657023e77f26 Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Fri, 10 Feb 2017 00:21:45 +0100
Subject: bpf: Add test_tag to .gitignore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index d3b1c9bca407..541d9d7fad5a 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -2,3 +2,4 @@ test_verifier
 test_maps
 test_lru_map
 test_lpm_map
+test_tag
-- 
cgit v1.2.3


From 43a30c2a3171e0f92c28dfd31ca3eb7d9cc72b26 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 19 Dec 2016 11:09:34 -0500
Subject: radix tree test suite: Depend on tools/include/asm files

Changing tools/include/asm/bug.h showed a missing dependency in the
Makefile.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 3635e4d3eca7..7f7600424b14 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -23,6 +23,7 @@ find_next_bit.o: ../../lib/find_bit.c
 
 $(OFILES): *.h */*.h \
 	../../include/linux/*.h \
+	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h
 
 radix-tree.c: ../../../lib/radix-tree.c
-- 
cgit v1.2.3


From a3c7890790e742074bc9e5640b0fcf9c61a771a2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 15:12:41 -0500
Subject: radix tree test suite: Remove mempool

The radix tree hasn't used a mempool since the beginning of git history.
Remove the userspace mempool implementation.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/linux.c         | 22 ----------------------
 tools/testing/radix-tree/linux/mempool.h | 16 ----------------
 2 files changed, 38 deletions(-)
 delete mode 100644 tools/testing/radix-tree/linux/mempool.h

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index d31ea7c9abec..c27c11261c37 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -5,7 +5,6 @@
 #include <unistd.h>
 #include <assert.h>
 
-#include <linux/mempool.h>
 #include <linux/poison.h>
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
@@ -22,27 +21,6 @@ struct kmem_cache {
 	void (*ctor)(void *);
 };
 
-void *mempool_alloc(mempool_t *pool, int gfp_mask)
-{
-	return pool->alloc(gfp_mask, pool->data);
-}
-
-void mempool_free(void *element, mempool_t *pool)
-{
-	pool->free(element, pool->data);
-}
-
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data)
-{
-	mempool_t *ret = malloc(sizeof(*ret));
-
-	ret->alloc = alloc_fn;
-	ret->free = free_fn;
-	ret->data = pool_data;
-	return ret;
-}
-
 void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 {
 	struct radix_tree_node *node;
diff --git a/tools/testing/radix-tree/linux/mempool.h b/tools/testing/radix-tree/linux/mempool.h
deleted file mode 100644
index 6a2dc55b41d6..000000000000
--- a/tools/testing/radix-tree/linux/mempool.h
+++ /dev/null
@@ -1,16 +0,0 @@
-
-#include <linux/slab.h>
-
-typedef void *(mempool_alloc_t)(int gfp_mask, void *pool_data);
-typedef void (mempool_free_t)(void *element, void *pool_data);
-
-typedef struct {
-	mempool_alloc_t *alloc;
-	mempool_free_t *free;
-	void *data;
-} mempool_t;
-
-void *mempool_alloc(mempool_t *pool, int gfp_mask);
-void mempool_free(void *element, mempool_t *pool);
-mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
-			mempool_free_t *free_fn, void *pool_data);
-- 
cgit v1.2.3


From 12ea65390bd5a46f8a70f068eb0d48922576a781 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 14:53:45 -0500
Subject: radix tree test suite: Remove types.h

Move the pieces we still need to tools/include and update a few implicit
includes.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/include/linux/compiler.h         |  4 ++++
 tools/include/linux/spinlock.h         |  5 +++++
 tools/testing/radix-tree/linux.c       |  1 +
 tools/testing/radix-tree/linux/gfp.h   |  2 ++
 tools/testing/radix-tree/linux/types.h | 23 -----------------------
 5 files changed, 12 insertions(+), 23 deletions(-)
 create mode 100644 tools/include/linux/spinlock.h
 delete mode 100644 tools/testing/radix-tree/linux/types.h

(limited to 'tools/testing')

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index e33fc1df3935..9d294161c494 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -21,6 +21,8 @@
 #endif
 
 #define __user
+#define __rcu
+#define __read_mostly
 
 #ifndef __attribute_const__
 # define __attribute_const__
@@ -50,6 +52,8 @@
 # define unlikely(x)		__builtin_expect(!!(x), 0)
 #endif
 
+#define uninitialized_var(x) x = *(&(x))
+
 #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
 
 #include <linux/types.h>
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
new file mode 100644
index 000000000000..58397dcb19d6
--- /dev/null
+++ b/tools/include/linux/spinlock.h
@@ -0,0 +1,5 @@
+#define spinlock_t		pthread_mutex_t
+#define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER;
+
+#define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
+#define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index c27c11261c37..93f06614e91b 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -5,6 +5,7 @@
 #include <unistd.h>
 #include <assert.h>
 
+#include <linux/gfp.h>
 #include <linux/poison.h>
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index 5b09b2ce6c33..701209816a6b 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -1,6 +1,8 @@
 #ifndef _GFP_H
 #define _GFP_H
 
+#include <linux/types.h>
+
 #define __GFP_BITS_SHIFT 26
 #define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h
deleted file mode 100644
index 8491d89873bb..000000000000
--- a/tools/testing/radix-tree/linux/types.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _TYPES_H
-#define _TYPES_H
-
-#include "../../include/linux/types.h"
-
-#define __rcu
-#define __read_mostly
-
-static inline void INIT_LIST_HEAD(struct list_head *list)
-{
-	list->next = list;
-	list->prev = list;
-}
-
-typedef struct {
-	unsigned int x;
-} spinlock_t;
-
-#define uninitialized_var(x) x = x
-
-#include <linux/gfp.h>
-
-#endif
-- 
cgit v1.2.3


From 7a4f11b88981f47d79eb64267e8a2989a18ce831 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 28 Dec 2016 19:40:49 -0500
Subject: radix tree test suite: Remove export.h

The tools/include export.h contains everything we need.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux/export.h | 2 --
 1 file changed, 2 deletions(-)
 delete mode 100644 tools/testing/radix-tree/linux/export.h

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/linux/export.h b/tools/testing/radix-tree/linux/export.h
deleted file mode 100644
index b6afd131998d..000000000000
--- a/tools/testing/radix-tree/linux/export.h
+++ /dev/null
@@ -1,2 +0,0 @@
-
-#define EXPORT_SYMBOL(sym)
-- 
cgit v1.2.3


From ab3a1ffd11d90583c71bd606bf0fd5bfef30bce9 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 15:11:05 -0500
Subject: radix tree test suite: Reduce kernel.h

Many of the definitions in the radix-tree kernel.h are redundant with
others in tools/include, or are no longer used, such as panic().
Move the definition of __init to init.h and in_interrupt() to preempt.h

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux/init.h    |  2 +-
 tools/testing/radix-tree/linux/kernel.h  | 39 +-------------------------------
 tools/testing/radix-tree/linux/preempt.h | 10 ++++++++
 3 files changed, 12 insertions(+), 39 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
index 360cabb3c4e7..1bb0afc21309 100644
--- a/tools/testing/radix-tree/linux/init.h
+++ b/tools/testing/radix-tree/linux/init.h
@@ -1 +1 @@
-/* An empty file stub that allows radix-tree.c to compile. */
+#define __init
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 9681463c91e2..dd1d9aefb14f 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -1,10 +1,9 @@
 #ifndef _KERNEL_H
 #define _KERNEL_H
 
-#include <assert.h>
+#include "../../include/linux/kernel.h"
 #include <string.h>
 #include <stdio.h>
-#include <stddef.h>
 #include <limits.h>
 
 #include <linux/compiler.h>
@@ -19,47 +18,11 @@
 #define RADIX_TREE_MAP_SHIFT	3
 #endif
 
-#ifndef NULL
-#define NULL	0
-#endif
-
-#define BUG_ON(expr)	assert(!(expr))
-#define __init
-#define __must_check
-#define panic(expr)
 #define printk printf
-#define __force
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #define pr_debug printk
 
-#define smp_rmb()	barrier()
-#define smp_wmb()	barrier()
-#define cpu_relax()	barrier()
-
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-#define container_of(ptr, type, member) ({                      \
-	const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
-	(type *)( (char *)__mptr - offsetof(type, member) );})
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
-#define cond_resched()	sched_yield()
-
-static inline int in_interrupt(void)
-{
-	return 0;
-}
-
-/*
- * This looks more complex than it should be. But we need to
- * get the type for the ~ right in round_down (it needs to be
- * as wide as the result!), and we want to evaluate the macro
- * arguments just once each.
- */
-#define __round_mask(x, y) ((__typeof__(x))((y)-1))
-#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
-#define round_down(x, y) ((x) & ~__round_mask(x, y))
-
 #define xchg(ptr, x)	uatomic_xchg(ptr, x)
 
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h
index 65c04c226965..35c5ac81529f 100644
--- a/tools/testing/radix-tree/linux/preempt.h
+++ b/tools/testing/radix-tree/linux/preempt.h
@@ -1,4 +1,14 @@
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
 extern int preempt_count;
 
 #define preempt_disable()	uatomic_inc(&preempt_count)
 #define preempt_enable()	uatomic_dec(&preempt_count)
+
+static inline int in_interrupt(void)
+{
+	return 0;
+}
+
+#endif /* __LINUX_PREEMPT_H */
-- 
cgit v1.2.3


From 991af734c46d726f1c62cabd1681563beb533e14 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Wed, 28 Dec 2016 22:53:46 -0500
Subject: radix tree test suite: Use vpath to find lib files

Instead of specifying how to build find_bit.o from lib/find_bit.o,
use vpath to tell make where to find find_bit.c.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/Makefile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 7f7600424b14..5274e88cd293 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -2,7 +2,7 @@
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
 TARGETS = main
-OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
+OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_bit.o \
 	 regression1.o regression2.o regression3.o multiorder.o \
 	 iteration_check.o benchmark.o
 
@@ -18,8 +18,7 @@ main:	$(OFILES)
 clean:
 	$(RM) -f $(TARGETS) *.o radix-tree.c
 
-find_next_bit.o: ../../lib/find_bit.c
-	$(CC) $(CFLAGS) -c -o $@ $<
+vpath %.c ../../lib
 
 $(OFILES): *.h */*.h \
 	../../include/linux/*.h \
-- 
cgit v1.2.3


From bfa11193c46d5ea8ef4d9a4f1c10221cdd985ab1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 29 Dec 2016 14:49:48 -0500
Subject: radix tree test suite: Remove obsolete CONFIG

radix-tree.c doesn't use these CONFIG options any more.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/generated/autoconf.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index ad18cf5a2a3a..cf88dc5b8832 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1,3 +1 @@
 #define CONFIG_RADIX_TREE_MULTIORDER 1
-#define CONFIG_SHMEM 1
-#define CONFIG_SWAP 1
-- 
cgit v1.2.3


From 0a835c4f090af2c76fc2932c539c3b32fd21fbbb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Tue, 20 Dec 2016 10:27:56 -0500
Subject: Reimplement IDR and IDA using the radix tree

The IDR is very similar to the radix tree.  It has some functionality that
the radix tree did not have (alloc next free, cyclic allocation, a
callback-based for_each, destroy tree), which is readily implementable on
top of the radix tree.  A few small changes were needed in order to use a
tag to represent nodes with free space below them.  More extensive
changes were needed to support storing NULL as a valid entry in an IDR.
Plain radix trees still interpret NULL as a not-present entry.

The IDA is reimplemented as a client of the newly enhanced radix tree.  As
in the current implementation, it uses a bitmap at the last level of the
tree.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/idr.h                     |  145 ++--
 include/linux/radix-tree.h              |   49 +-
 init/main.c                             |    3 +-
 lib/idr.c                               | 1178 ++++++-------------------------
 lib/radix-tree.c                        |  375 +++++++---
 tools/testing/radix-tree/.gitignore     |    1 +
 tools/testing/radix-tree/Makefile       |   10 +-
 tools/testing/radix-tree/idr-test.c     |  342 +++++++++
 tools/testing/radix-tree/linux/gfp.h    |    8 +-
 tools/testing/radix-tree/linux/idr.h    |    1 +
 tools/testing/radix-tree/linux/kernel.h |    1 +
 tools/testing/radix-tree/main.c         |    6 +
 tools/testing/radix-tree/test.h         |    2 +
 13 files changed, 995 insertions(+), 1126 deletions(-)
 create mode 100644 tools/testing/radix-tree/idr-test.c
 create mode 100644 tools/testing/radix-tree/linux/idr.h

(limited to 'tools/testing')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3c01b89aed67..f58c0a3addc3 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -12,47 +12,28 @@
 #ifndef __IDR_H__
 #define __IDR_H__
 
-#include <linux/types.h>
-#include <linux/bitops.h>
-#include <linux/init.h>
-#include <linux/rcupdate.h>
+#include <linux/radix-tree.h>
+#include <linux/gfp.h>
+
+struct idr {
+	struct radix_tree_root	idr_rt;
+	unsigned int		idr_next;
+};
 
 /*
- * Using 6 bits at each layer allows us to allocate 7 layers out of each page.
- * 8 bits only gave us 3 layers out of every pair of pages, which is less
- * efficient except for trees with a largest element between 192-255 inclusive.
+ * The IDR API does not expose the tagging functionality of the radix tree
+ * to users.  Use tag 0 to track whether a node has free space below it.
  */
-#define IDR_BITS 6
-#define IDR_SIZE (1 << IDR_BITS)
-#define IDR_MASK ((1 << IDR_BITS)-1)
-
-struct idr_layer {
-	int			prefix;	/* the ID prefix of this idr_layer */
-	int			layer;	/* distance from leaf */
-	struct idr_layer __rcu	*ary[1<<IDR_BITS];
-	int			count;	/* When zero, we can release it */
-	union {
-		/* A zero bit means "space here" */
-		DECLARE_BITMAP(bitmap, IDR_SIZE);
-		struct rcu_head		rcu_head;
-	};
-};
+#define IDR_FREE	0
 
-struct idr {
-	struct idr_layer __rcu	*hint;	/* the last layer allocated from */
-	struct idr_layer __rcu	*top;
-	int			layers;	/* only valid w/o concurrent changes */
-	int			cur;	/* current pos for cyclic allocation */
-	spinlock_t		lock;
-	int			id_free_cnt;
-	struct idr_layer	*id_free;
-};
+/* Set the IDR flag and the IDR_FREE tag */
+#define IDR_RT_MARKER		((__force gfp_t)(3 << __GFP_BITS_SHIFT))
 
-#define IDR_INIT(name)							\
+#define IDR_INIT							\
 {									\
-	.lock			= __SPIN_LOCK_UNLOCKED(name.lock),	\
+	.idr_rt = RADIX_TREE_INIT(IDR_RT_MARKER)			\
 }
-#define DEFINE_IDR(name)	struct idr name = IDR_INIT(name)
+#define DEFINE_IDR(name)	struct idr name = IDR_INIT
 
 /**
  * idr_get_cursor - Return the current position of the cyclic allocator
@@ -62,9 +43,9 @@ struct idr {
  * idr_alloc_cyclic() if it is free (otherwise the search will start from
  * this position).
  */
-static inline unsigned int idr_get_cursor(struct idr *idr)
+static inline unsigned int idr_get_cursor(const struct idr *idr)
 {
-	return READ_ONCE(idr->cur);
+	return READ_ONCE(idr->idr_next);
 }
 
 /**
@@ -77,7 +58,7 @@ static inline unsigned int idr_get_cursor(struct idr *idr)
  */
 static inline void idr_set_cursor(struct idr *idr, unsigned int val)
 {
-	WRITE_ONCE(idr->cur, val);
+	WRITE_ONCE(idr->idr_next, val);
 }
 
 /**
@@ -97,22 +78,31 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
  * period).
  */
 
-/*
- * This is what we export.
- */
-
-void *idr_find_slowpath(struct idr *idp, int id);
 void idr_preload(gfp_t gfp_mask);
-int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask);
-int idr_for_each(struct idr *idp,
+int idr_alloc(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_alloc_cyclic(struct idr *, void *entry, int start, int end, gfp_t);
+int idr_for_each(const struct idr *,
 		 int (*fn)(int id, void *p, void *data), void *data);
-void *idr_get_next(struct idr *idp, int *nextid);
-void *idr_replace(struct idr *idp, void *ptr, int id);
-void idr_remove(struct idr *idp, int id);
-void idr_destroy(struct idr *idp);
-void idr_init(struct idr *idp);
-bool idr_is_empty(struct idr *idp);
+void *idr_get_next(struct idr *, int *nextid);
+void *idr_replace(struct idr *, void *, int id);
+void idr_destroy(struct idr *);
+
+static inline void idr_remove(struct idr *idr, int id)
+{
+	radix_tree_delete(&idr->idr_rt, id);
+}
+
+static inline void idr_init(struct idr *idr)
+{
+	INIT_RADIX_TREE(&idr->idr_rt, IDR_RT_MARKER);
+	idr->idr_next = 0;
+}
+
+static inline bool idr_is_empty(const struct idr *idr)
+{
+	return radix_tree_empty(&idr->idr_rt) &&
+		radix_tree_tagged(&idr->idr_rt, IDR_FREE);
+}
 
 /**
  * idr_preload_end - end preload section started with idr_preload()
@@ -137,19 +127,14 @@ static inline void idr_preload_end(void)
  * This function can be called under rcu_read_lock(), given that the leaf
  * pointers lifetimes are correctly managed.
  */
-static inline void *idr_find(struct idr *idr, int id)
+static inline void *idr_find(const struct idr *idr, int id)
 {
-	struct idr_layer *hint = rcu_dereference_raw(idr->hint);
-
-	if (hint && (id & ~IDR_MASK) == hint->prefix)
-		return rcu_dereference_raw(hint->ary[id & IDR_MASK]);
-
-	return idr_find_slowpath(idr, id);
+	return radix_tree_lookup(&idr->idr_rt, id);
 }
 
 /**
  * idr_for_each_entry - iterate over an idr's elements of a given type
- * @idp:     idr handle
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
@@ -157,57 +142,60 @@ static inline void *idr_find(struct idr *idr, int id)
  * after normal terminatinon @entry is left with the value NULL.  This
  * is convenient for a "not found" value.
  */
-#define idr_for_each_entry(idp, entry, id)			\
-	for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id)
+#define idr_for_each_entry(idr, entry, id)			\
+	for (id = 0; ((entry) = idr_get_next(idr, &(id))) != NULL; ++id)
 
 /**
- * idr_for_each_entry - continue iteration over an idr's elements of a given type
- * @idp:     idr handle
+ * idr_for_each_entry_continue - continue iteration over an idr's elements of a given type
+ * @idr:     idr handle
  * @entry:   the type * to use as cursor
  * @id:      id entry's key
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
  */
-#define idr_for_each_entry_continue(idp, entry, id)			\
-	for ((entry) = idr_get_next((idp), &(id));			\
+#define idr_for_each_entry_continue(idr, entry, id)			\
+	for ((entry) = idr_get_next((idr), &(id));			\
 	     entry;							\
-	     ++id, (entry) = idr_get_next((idp), &(id)))
+	     ++id, (entry) = idr_get_next((idr), &(id)))
 
 /*
  * IDA - IDR based id allocator, use when translation from id to
  * pointer isn't necessary.
- *
- * IDA_BITMAP_LONGS is calculated to be one less to accommodate
- * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes.
  */
 #define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
-#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long) - 1)
+#define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long))
 #define IDA_BITMAP_BITS 	(IDA_BITMAP_LONGS * sizeof(long) * 8)
 
 struct ida_bitmap {
-	long			nr_busy;
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
 struct ida {
-	struct idr		idr;
+	struct radix_tree_root	ida_rt;
 	struct ida_bitmap	*free_bitmap;
 };
 
-#define IDA_INIT(name)		{ .idr = IDR_INIT((name).idr), .free_bitmap = NULL, }
-#define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
+#define IDA_INIT	{						\
+	.ida_rt = RADIX_TREE_INIT(IDR_RT_MARKER | GFP_NOWAIT),		\
+}
+#define DEFINE_IDA(name)	struct ida name = IDA_INIT
 
 int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 int ida_get_new_above(struct ida *ida, int starting_id, int *p_id);
 void ida_remove(struct ida *ida, int id);
 void ida_destroy(struct ida *ida);
-void ida_init(struct ida *ida);
 
 int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end,
 		   gfp_t gfp_mask);
 void ida_simple_remove(struct ida *ida, unsigned int id);
 
+static inline void ida_init(struct ida *ida)
+{
+	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
+	ida->free_bitmap = NULL;
+}
+
 /**
  * ida_get_new - allocate new ID
  * @ida:	idr handle
@@ -220,11 +208,8 @@ static inline int ida_get_new(struct ida *ida, int *p_id)
 	return ida_get_new_above(ida, 0, p_id);
 }
 
-static inline bool ida_is_empty(struct ida *ida)
+static inline bool ida_is_empty(const struct ida *ida)
 {
-	return idr_is_empty(&ida->idr);
+	return radix_tree_empty(&ida->ida_rt);
 }
-
-void __init idr_init_cache(void);
-
 #endif /* __IDR_H__ */
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 05f715cb8062..2ba0c1f46c84 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -105,7 +105,10 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
+/* The top bits of gfp_mask are used to store the root tags and the IDR flag */
+#define ROOT_IS_IDR	((__force gfp_t)(1 << __GFP_BITS_SHIFT))
+#define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT + 1)
+
 struct radix_tree_root {
 	gfp_t			gfp_mask;
 	struct radix_tree_node	__rcu *rnode;
@@ -358,10 +361,14 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
 			unsigned new_order);
 int radix_tree_join(struct radix_tree_root *, unsigned long index,
 			unsigned new_order, void *);
+void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
+			gfp_t, int end);
 
-#define RADIX_TREE_ITER_TAG_MASK	0x00FF	/* tag index in lower byte */
-#define RADIX_TREE_ITER_TAGGED		0x0100	/* lookup tagged slots */
-#define RADIX_TREE_ITER_CONTIG		0x0200	/* stop at first hole */
+enum {
+	RADIX_TREE_ITER_TAG_MASK = 0x0f,	/* tag index in lower nybble */
+	RADIX_TREE_ITER_TAGGED   = 0x10,	/* lookup tagged slots */
+	RADIX_TREE_ITER_CONTIG   = 0x20,	/* stop at first hole */
+};
 
 /**
  * radix_tree_iter_init - initialize radix tree iterator
@@ -402,6 +409,40 @@ radix_tree_iter_init(struct radix_tree_iter *iter, unsigned long start)
 void **radix_tree_next_chunk(const struct radix_tree_root *,
 			     struct radix_tree_iter *iter, unsigned flags);
 
+/**
+ * radix_tree_iter_lookup - look up an index in the radix tree
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: key to look up
+ *
+ * If @index is present in the radix tree, this function returns the slot
+ * containing it and updates @iter to describe the entry.  If @index is not
+ * present, it returns NULL.
+ */
+static inline void **radix_tree_iter_lookup(const struct radix_tree_root *root,
+			struct radix_tree_iter *iter, unsigned long index)
+{
+	radix_tree_iter_init(iter, index);
+	return radix_tree_next_chunk(root, iter, RADIX_TREE_ITER_CONTIG);
+}
+
+/**
+ * radix_tree_iter_find - find a present entry
+ * @root: radix tree root
+ * @iter: iterator state
+ * @index: start location
+ *
+ * This function returns the slot containing the entry with the lowest index
+ * which is at least @index.  If @index is larger than any present entry, this
+ * function returns NULL.  The @iter is updated to describe the entry found.
+ */
+static inline void **radix_tree_iter_find(const struct radix_tree_root *root,
+			struct radix_tree_iter *iter, unsigned long index)
+{
+	radix_tree_iter_init(iter, index);
+	return radix_tree_next_chunk(root, iter, 0);
+}
+
 /**
  * radix_tree_iter_retry - retry this chunk of the iteration
  * @iter:	iterator state
diff --git a/init/main.c b/init/main.c
index b0c9d6facef9..a65e3aad31bc 100644
--- a/init/main.c
+++ b/init/main.c
@@ -553,7 +553,7 @@ asmlinkage __visible void __init start_kernel(void)
 	if (WARN(!irqs_disabled(),
 		 "Interrupts were enabled *very* early, fixing it\n"))
 		local_irq_disable();
-	idr_init_cache();
+	radix_tree_init();
 
 	/*
 	 * Allow workqueue creation and work item queueing/cancelling
@@ -568,7 +568,6 @@ asmlinkage __visible void __init start_kernel(void)
 	trace_init();
 
 	context_tracking_init();
-	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
 	early_irq_init();
 	init_IRQ();
diff --git a/lib/idr.c b/lib/idr.c
index 52d2979a05e8..b87056e2cc4c 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -1,1068 +1,369 @@
-/*
- * 2002-10-18  written by Jim Houston jim.houston@ccur.com
- *	Copyright (C) 2002 by Concurrent Computer Corporation
- *	Distributed under the GNU GPL license version 2.
- *
- * Modified by George Anzinger to reuse immediately and to use
- * find bit instructions.  Also removed _irq on spinlocks.
- *
- * Modified by Nadia Derbey to make it RCU safe.
- *
- * Small id to pointer translation service.
- *
- * It uses a radix tree like structure as a sparse array indexed
- * by the id to obtain the pointer.  The bitmap makes allocating
- * a new id quick.
- *
- * You call it to allocate an id (an int) an associate with that id a
- * pointer or what ever, we treat it as a (void *).  You can pass this
- * id to a user for him to pass back at a later time.  You then pass
- * that id to this code and it returns your pointer.
- */
-
-#ifndef TEST                        // to test in user space...
-#include <linux/slab.h>
-#include <linux/init.h>
+#include <linux/bitmap.h>
 #include <linux/export.h>
-#endif
-#include <linux/err.h>
-#include <linux/string.h>
 #include <linux/idr.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/percpu.h>
 
-#define MAX_IDR_SHIFT		(sizeof(int) * 8 - 1)
-#define MAX_IDR_BIT		(1U << MAX_IDR_SHIFT)
-
-/* Leave the possibility of an incomplete final layer */
-#define MAX_IDR_LEVEL ((MAX_IDR_SHIFT + IDR_BITS - 1) / IDR_BITS)
-
-/* Number of id_layer structs to leave in free list */
-#define MAX_IDR_FREE (MAX_IDR_LEVEL * 2)
-
-static struct kmem_cache *idr_layer_cache;
-static DEFINE_PER_CPU(struct idr_layer *, idr_preload_head);
-static DEFINE_PER_CPU(int, idr_preload_cnt);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-/* the maximum ID which can be allocated given idr->layers */
-static int idr_max(int layers)
-{
-	int bits = min_t(int, layers * IDR_BITS, MAX_IDR_SHIFT);
-
-	return (1 << bits) - 1;
-}
-
-/*
- * Prefix mask for an idr_layer at @layer.  For layer 0, the prefix mask is
- * all bits except for the lower IDR_BITS.  For layer 1, 2 * IDR_BITS, and
- * so on.
- */
-static int idr_layer_prefix_mask(int layer)
-{
-	return ~idr_max(layer + 1);
-}
-
-static struct idr_layer *get_from_free_list(struct idr *idp)
-{
-	struct idr_layer *p;
-	unsigned long flags;
-
-	spin_lock_irqsave(&idp->lock, flags);
-	if ((p = idp->id_free)) {
-		idp->id_free = p->ary[0];
-		idp->id_free_cnt--;
-		p->ary[0] = NULL;
-	}
-	spin_unlock_irqrestore(&idp->lock, flags);
-	return(p);
-}
-
-/**
- * idr_layer_alloc - allocate a new idr_layer
- * @gfp_mask: allocation mask
- * @layer_idr: optional idr to allocate from
- *
- * If @layer_idr is %NULL, directly allocate one using @gfp_mask or fetch
- * one from the per-cpu preload buffer.  If @layer_idr is not %NULL, fetch
- * an idr_layer from @idr->id_free.
- *
- * @layer_idr is to maintain backward compatibility with the old alloc
- * interface - idr_pre_get() and idr_get_new*() - and will be removed
- * together with per-pool preload buffer.
- */
-static struct idr_layer *idr_layer_alloc(gfp_t gfp_mask, struct idr *layer_idr)
-{
-	struct idr_layer *new;
-
-	/* this is the old path, bypass to get_from_free_list() */
-	if (layer_idr)
-		return get_from_free_list(layer_idr);
-
-	/*
-	 * Try to allocate directly from kmem_cache.  We want to try this
-	 * before preload buffer; otherwise, non-preloading idr_alloc()
-	 * users will end up taking advantage of preloading ones.  As the
-	 * following is allowed to fail for preloaded cases, suppress
-	 * warning this time.
-	 */
-	new = kmem_cache_zalloc(idr_layer_cache, gfp_mask | __GFP_NOWARN);
-	if (new)
-		return new;
-
-	/*
-	 * Try to fetch one from the per-cpu preload buffer if in process
-	 * context.  See idr_preload() for details.
-	 */
-	if (!in_interrupt()) {
-		preempt_disable();
-		new = __this_cpu_read(idr_preload_head);
-		if (new) {
-			__this_cpu_write(idr_preload_head, new->ary[0]);
-			__this_cpu_dec(idr_preload_cnt);
-			new->ary[0] = NULL;
-		}
-		preempt_enable();
-		if (new)
-			return new;
-	}
-
-	/*
-	 * Both failed.  Try kmem_cache again w/o adding __GFP_NOWARN so
-	 * that memory allocation failure warning is printed as intended.
-	 */
-	return kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-}
-
-static void idr_layer_rcu_free(struct rcu_head *head)
-{
-	struct idr_layer *layer;
-
-	layer = container_of(head, struct idr_layer, rcu_head);
-	kmem_cache_free(idr_layer_cache, layer);
-}
-
-static inline void free_layer(struct idr *idr, struct idr_layer *p)
-{
-	if (idr->hint == p)
-		RCU_INIT_POINTER(idr->hint, NULL);
-	call_rcu(&p->rcu_head, idr_layer_rcu_free);
-}
-
-/* only called when idp->lock is held */
-static void __move_to_free_list(struct idr *idp, struct idr_layer *p)
-{
-	p->ary[0] = idp->id_free;
-	idp->id_free = p;
-	idp->id_free_cnt++;
-}
-
-static void move_to_free_list(struct idr *idp, struct idr_layer *p)
-{
-	unsigned long flags;
-
-	/*
-	 * Depends on the return element being zeroed.
-	 */
-	spin_lock_irqsave(&idp->lock, flags);
-	__move_to_free_list(idp, p);
-	spin_unlock_irqrestore(&idp->lock, flags);
-}
-
-static void idr_mark_full(struct idr_layer **pa, int id)
-{
-	struct idr_layer *p = pa[0];
-	int l = 0;
-
-	__set_bit(id & IDR_MASK, p->bitmap);
-	/*
-	 * If this layer is full mark the bit in the layer above to
-	 * show that this part of the radix tree is full.  This may
-	 * complete the layer above and require walking up the radix
-	 * tree.
-	 */
-	while (bitmap_full(p->bitmap, IDR_SIZE)) {
-		if (!(p = pa[++l]))
-			break;
-		id = id >> IDR_BITS;
-		__set_bit((id & IDR_MASK), p->bitmap);
-	}
-}
-
-static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask)
-{
-	while (idp->id_free_cnt < MAX_IDR_FREE) {
-		struct idr_layer *new;
-		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-		if (new == NULL)
-			return (0);
-		move_to_free_list(idp, new);
-	}
-	return 1;
-}
-
-/**
- * sub_alloc - try to allocate an id without growing the tree depth
- * @idp: idr handle
- * @starting_id: id to start search at
- * @pa: idr_layer[MAX_IDR_LEVEL] used as backtrack buffer
- * @gfp_mask: allocation mask for idr_layer_alloc()
- * @layer_idr: optional idr passed to idr_layer_alloc()
- *
- * Allocate an id in range [@starting_id, INT_MAX] from @idp without
- * growing its depth.  Returns
- *
- *  the allocated id >= 0 if successful,
- *  -EAGAIN if the tree needs to grow for allocation to succeed,
- *  -ENOSPC if the id space is exhausted,
- *  -ENOMEM if more idr_layers need to be allocated.
- */
-static int sub_alloc(struct idr *idp, int *starting_id, struct idr_layer **pa,
-		     gfp_t gfp_mask, struct idr *layer_idr)
-{
-	int n, m, sh;
-	struct idr_layer *p, *new;
-	int l, id, oid;
-
-	id = *starting_id;
- restart:
-	p = idp->top;
-	l = idp->layers;
-	pa[l--] = NULL;
-	while (1) {
-		/*
-		 * We run around this while until we reach the leaf node...
-		 */
-		n = (id >> (IDR_BITS*l)) & IDR_MASK;
-		m = find_next_zero_bit(p->bitmap, IDR_SIZE, n);
-		if (m == IDR_SIZE) {
-			/* no space available go back to previous layer. */
-			l++;
-			oid = id;
-			id = (id | ((1 << (IDR_BITS * l)) - 1)) + 1;
-
-			/* if already at the top layer, we need to grow */
-			if (id > idr_max(idp->layers)) {
-				*starting_id = id;
-				return -EAGAIN;
-			}
-			p = pa[l];
-			BUG_ON(!p);
-
-			/* If we need to go up one layer, continue the
-			 * loop; otherwise, restart from the top.
-			 */
-			sh = IDR_BITS * (l + 1);
-			if (oid >> sh == id >> sh)
-				continue;
-			else
-				goto restart;
-		}
-		if (m != n) {
-			sh = IDR_BITS*l;
-			id = ((id >> sh) ^ n ^ m) << sh;
-		}
-		if ((id >= MAX_IDR_BIT) || (id < 0))
-			return -ENOSPC;
-		if (l == 0)
-			break;
-		/*
-		 * Create the layer below if it is missing.
-		 */
-		if (!p->ary[m]) {
-			new = idr_layer_alloc(gfp_mask, layer_idr);
-			if (!new)
-				return -ENOMEM;
-			new->layer = l-1;
-			new->prefix = id & idr_layer_prefix_mask(new->layer);
-			rcu_assign_pointer(p->ary[m], new);
-			p->count++;
-		}
-		pa[l--] = p;
-		p = p->ary[m];
-	}
-
-	pa[l] = p;
-	return id;
-}
-
-static int idr_get_empty_slot(struct idr *idp, int starting_id,
-			      struct idr_layer **pa, gfp_t gfp_mask,
-			      struct idr *layer_idr)
-{
-	struct idr_layer *p, *new;
-	int layers, v, id;
-	unsigned long flags;
-
-	id = starting_id;
-build_up:
-	p = idp->top;
-	layers = idp->layers;
-	if (unlikely(!p)) {
-		if (!(p = idr_layer_alloc(gfp_mask, layer_idr)))
-			return -ENOMEM;
-		p->layer = 0;
-		layers = 1;
-	}
-	/*
-	 * Add a new layer to the top of the tree if the requested
-	 * id is larger than the currently allocated space.
-	 */
-	while (id > idr_max(layers)) {
-		layers++;
-		if (!p->count) {
-			/* special case: if the tree is currently empty,
-			 * then we grow the tree by moving the top node
-			 * upwards.
-			 */
-			p->layer++;
-			WARN_ON_ONCE(p->prefix);
-			continue;
-		}
-		if (!(new = idr_layer_alloc(gfp_mask, layer_idr))) {
-			/*
-			 * The allocation failed.  If we built part of
-			 * the structure tear it down.
-			 */
-			spin_lock_irqsave(&idp->lock, flags);
-			for (new = p; p && p != idp->top; new = p) {
-				p = p->ary[0];
-				new->ary[0] = NULL;
-				new->count = 0;
-				bitmap_clear(new->bitmap, 0, IDR_SIZE);
-				__move_to_free_list(idp, new);
-			}
-			spin_unlock_irqrestore(&idp->lock, flags);
-			return -ENOMEM;
-		}
-		new->ary[0] = p;
-		new->count = 1;
-		new->layer = layers-1;
-		new->prefix = id & idr_layer_prefix_mask(new->layer);
-		if (bitmap_full(p->bitmap, IDR_SIZE))
-			__set_bit(0, new->bitmap);
-		p = new;
-	}
-	rcu_assign_pointer(idp->top, p);
-	idp->layers = layers;
-	v = sub_alloc(idp, &id, pa, gfp_mask, layer_idr);
-	if (v == -EAGAIN)
-		goto build_up;
-	return(v);
-}
-
-/*
- * @id and @pa are from a successful allocation from idr_get_empty_slot().
- * Install the user pointer @ptr and mark the slot full.
- */
-static void idr_fill_slot(struct idr *idr, void *ptr, int id,
-			  struct idr_layer **pa)
-{
-	/* update hint used for lookup, cleared from free_layer() */
-	rcu_assign_pointer(idr->hint, pa[0]);
-
-	rcu_assign_pointer(pa[0]->ary[id & IDR_MASK], (struct idr_layer *)ptr);
-	pa[0]->count++;
-	idr_mark_full(pa, id);
-}
-
-
-/**
- * idr_preload - preload for idr_alloc()
- * @gfp_mask: allocation mask to use for preloading
- *
- * Preload per-cpu layer buffer for idr_alloc().  Can only be used from
- * process context and each idr_preload() invocation should be matched with
- * idr_preload_end().  Note that preemption is disabled while preloaded.
- *
- * The first idr_alloc() in the preloaded section can be treated as if it
- * were invoked with @gfp_mask used for preloading.  This allows using more
- * permissive allocation masks for idrs protected by spinlocks.
- *
- * For example, if idr_alloc() below fails, the failure can be treated as
- * if idr_alloc() were called with GFP_KERNEL rather than GFP_NOWAIT.
- *
- *	idr_preload(GFP_KERNEL);
- *	spin_lock(lock);
- *
- *	id = idr_alloc(idr, ptr, start, end, GFP_NOWAIT);
- *
- *	spin_unlock(lock);
- *	idr_preload_end();
- *	if (id < 0)
- *		error;
- */
-void idr_preload(gfp_t gfp_mask)
-{
-	/*
-	 * Consuming preload buffer from non-process context breaks preload
-	 * allocation guarantee.  Disallow usage from those contexts.
-	 */
-	WARN_ON_ONCE(in_interrupt());
-	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
-
-	preempt_disable();
-
-	/*
-	 * idr_alloc() is likely to succeed w/o full idr_layer buffer and
-	 * return value from idr_alloc() needs to be checked for failure
-	 * anyway.  Silently give up if allocation fails.  The caller can
-	 * treat failures from idr_alloc() as if idr_alloc() were called
-	 * with @gfp_mask which should be enough.
-	 */
-	while (__this_cpu_read(idr_preload_cnt) < MAX_IDR_FREE) {
-		struct idr_layer *new;
-
-		preempt_enable();
-		new = kmem_cache_zalloc(idr_layer_cache, gfp_mask);
-		preempt_disable();
-		if (!new)
-			break;
-
-		/* link the new one to per-cpu preload list */
-		new->ary[0] = __this_cpu_read(idr_preload_head);
-		__this_cpu_write(idr_preload_head, new);
-		__this_cpu_inc(idr_preload_cnt);
-	}
-}
-EXPORT_SYMBOL(idr_preload);
-
 /**
- * idr_alloc - allocate new idr entry
- * @idr: the (initialized) idr
+ * idr_alloc - allocate an id
+ * @idr: idr handle
  * @ptr: pointer to be associated with the new id
  * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive, <= 0 for max)
- * @gfp_mask: memory allocation flags
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
  *
- * Allocate an id in [start, end) and associate it with @ptr.  If no ID is
- * available in the specified range, returns -ENOSPC.  On memory allocation
- * failure, returns -ENOMEM.
+ * Allocates an unused ID in the range [start, end).  Returns -ENOSPC
+ * if there are no unused IDs in that range.
  *
  * Note that @end is treated as max when <= 0.  This is to always allow
  * using @start + N as @end as long as N is inside integer range.
  *
- * The user is responsible for exclusively synchronizing all operations
- * which may modify @idr.  However, read-only accesses such as idr_find()
- * or iteration can be performed under RCU read lock provided the user
- * destroys @ptr in RCU-safe way after removal from idr.
+ * Simultaneous modifications to the @idr are not allowed and should be
+ * prevented by the user, usually with a lock.  idr_alloc() may be called
+ * concurrently with read-only accesses to the @idr, such as idr_find() and
+ * idr_for_each_entry().
  */
-int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int max = end > 0 ? end - 1 : INT_MAX;	/* inclusive upper limit */
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	int id;
-
-	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+	void **slot;
+	struct radix_tree_iter iter;
 
-	/* sanity checks */
 	if (WARN_ON_ONCE(start < 0))
 		return -EINVAL;
-	if (unlikely(max < start))
-		return -ENOSPC;
+	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
+		return -EINVAL;
 
-	/* allocate id */
-	id = idr_get_empty_slot(idr, start, pa, gfp_mask, NULL);
-	if (unlikely(id < 0))
-		return id;
-	if (unlikely(id > max))
-		return -ENOSPC;
+	radix_tree_iter_init(&iter, start);
+	slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+	if (IS_ERR(slot))
+		return PTR_ERR(slot);
 
-	idr_fill_slot(idr, ptr, id, pa);
-	return id;
+	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
+	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
+	return iter.index;
 }
 EXPORT_SYMBOL_GPL(idr_alloc);
 
 /**
  * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
- * @idr: the (initialized) idr
+ * @idr: idr handle
  * @ptr: pointer to be associated with the new id
  * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive, <= 0 for max)
- * @gfp_mask: memory allocation flags
- *
- * Essentially the same as idr_alloc, but prefers to allocate progressively
- * higher ids if it can. If the "cur" counter wraps, then it will start again
- * at the "start" end of the range and allocate one that has already been used.
- */
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end,
-			gfp_t gfp_mask)
-{
-	int id;
-
-	id = idr_alloc(idr, ptr, max(start, idr->cur), end, gfp_mask);
-	if (id == -ENOSPC)
-		id = idr_alloc(idr, ptr, start, end, gfp_mask);
-
-	if (likely(id >= 0))
-		idr->cur = id + 1;
-	return id;
-}
-EXPORT_SYMBOL(idr_alloc_cyclic);
-
-static void idr_remove_warning(int id)
-{
-	WARN(1, "idr_remove called for id=%d which is not allocated.\n", id);
-}
-
-static void sub_remove(struct idr *idp, int shift, int id)
-{
-	struct idr_layer *p = idp->top;
-	struct idr_layer **pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer ***paa = &pa[0];
-	struct idr_layer *to_free;
-	int n;
-
-	*paa = NULL;
-	*++paa = &idp->top;
-
-	while ((shift > 0) && p) {
-		n = (id >> shift) & IDR_MASK;
-		__clear_bit(n, p->bitmap);
-		*++paa = &p->ary[n];
-		p = p->ary[n];
-		shift -= IDR_BITS;
-	}
-	n = id & IDR_MASK;
-	if (likely(p != NULL && test_bit(n, p->bitmap))) {
-		__clear_bit(n, p->bitmap);
-		RCU_INIT_POINTER(p->ary[n], NULL);
-		to_free = NULL;
-		while(*paa && ! --((**paa)->count)){
-			if (to_free)
-				free_layer(idp, to_free);
-			to_free = **paa;
-			**paa-- = NULL;
-		}
-		if (!*paa)
-			idp->layers = 0;
-		if (to_free)
-			free_layer(idp, to_free);
-	} else
-		idr_remove_warning(id);
-}
-
-/**
- * idr_remove - remove the given id and free its slot
- * @idp: idr handle
- * @id: unique key
- */
-void idr_remove(struct idr *idp, int id)
-{
-	struct idr_layer *p;
-	struct idr_layer *to_free;
-
-	if (id < 0)
-		return;
-
-	if (id > idr_max(idp->layers)) {
-		idr_remove_warning(id);
-		return;
-	}
-
-	sub_remove(idp, (idp->layers - 1) * IDR_BITS, id);
-	if (idp->top && idp->top->count == 1 && (idp->layers > 1) &&
-	    idp->top->ary[0]) {
-		/*
-		 * Single child at leftmost slot: we can shrink the tree.
-		 * This level is not needed anymore since when layers are
-		 * inserted, they are inserted at the top of the existing
-		 * tree.
-		 */
-		to_free = idp->top;
-		p = idp->top->ary[0];
-		rcu_assign_pointer(idp->top, p);
-		--idp->layers;
-		to_free->count = 0;
-		bitmap_clear(to_free->bitmap, 0, IDR_SIZE);
-		free_layer(idp, to_free);
-	}
-}
-EXPORT_SYMBOL(idr_remove);
-
-static void __idr_remove_all(struct idr *idp)
-{
-	int n, id, max;
-	int bt_mask;
-	struct idr_layer *p;
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer **paa = &pa[0];
-
-	n = idp->layers * IDR_BITS;
-	*paa = idp->top;
-	RCU_INIT_POINTER(idp->top, NULL);
-	max = idr_max(idp->layers);
-
-	id = 0;
-	while (id >= 0 && id <= max) {
-		p = *paa;
-		while (n > IDR_BITS && p) {
-			n -= IDR_BITS;
-			p = p->ary[(id >> n) & IDR_MASK];
-			*++paa = p;
-		}
-
-		bt_mask = id;
-		id += 1 << n;
-		/* Get the highest bit that the above add changed from 0->1. */
-		while (n < fls(id ^ bt_mask)) {
-			if (*paa)
-				free_layer(idp, *paa);
-			n += IDR_BITS;
-			--paa;
-		}
-	}
-	idp->layers = 0;
-}
-
-/**
- * idr_destroy - release all cached layers within an idr tree
- * @idp: idr handle
- *
- * Free all id mappings and all idp_layers.  After this function, @idp is
- * completely unused and can be freed / recycled.  The caller is
- * responsible for ensuring that no one else accesses @idp during or after
- * idr_destroy().
+ * @end: the maximum id (exclusive)
+ * @gfp: memory allocation flags
  *
- * A typical clean-up sequence for objects stored in an idr tree will use
- * idr_for_each() to free all objects, if necessary, then idr_destroy() to
- * free up the id mappings and cached idr_layers.
+ * Allocates an ID larger than the last ID allocated if one is available.
+ * If not, it will attempt to allocate the smallest ID that is larger or
+ * equal to @start.
  */
-void idr_destroy(struct idr *idp)
-{
-	__idr_remove_all(idp);
-
-	while (idp->id_free_cnt) {
-		struct idr_layer *p = get_from_free_list(idp);
-		kmem_cache_free(idr_layer_cache, p);
-	}
-}
-EXPORT_SYMBOL(idr_destroy);
-
-void *idr_find_slowpath(struct idr *idp, int id)
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int n;
-	struct idr_layer *p;
+	int id, curr = idr->idr_next;
 
-	if (id < 0)
-		return NULL;
+	if (curr < start)
+		curr = start;
 
-	p = rcu_dereference_raw(idp->top);
-	if (!p)
-		return NULL;
-	n = (p->layer+1) * IDR_BITS;
+	id = idr_alloc(idr, ptr, curr, end, gfp);
+	if ((id == -ENOSPC) && (curr > start))
+		id = idr_alloc(idr, ptr, start, curr, gfp);
 
-	if (id > idr_max(p->layer + 1))
-		return NULL;
-	BUG_ON(n == 0);
+	if (id >= 0)
+		idr->idr_next = id + 1U;
 
-	while (n > 0 && p) {
-		n -= IDR_BITS;
-		BUG_ON(n != p->layer*IDR_BITS);
-		p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-	}
-	return((void *)p);
+	return id;
 }
-EXPORT_SYMBOL(idr_find_slowpath);
+EXPORT_SYMBOL(idr_alloc_cyclic);
 
 /**
  * idr_for_each - iterate through all stored pointers
- * @idp: idr handle
+ * @idr: idr handle
  * @fn: function to be called for each pointer
- * @data: data passed back to callback function
+ * @data: data passed to callback function
  *
- * Iterate over the pointers registered with the given idr.  The
- * callback function will be called for each pointer currently
- * registered, passing the id, the pointer and the data pointer passed
- * to this function.  It is not safe to modify the idr tree while in
- * the callback, so functions such as idr_get_new and idr_remove are
- * not allowed.
+ * The callback function will be called for each entry in @idr, passing
+ * the id, the pointer and the data pointer passed to this function.
  *
- * We check the return of @fn each time. If it returns anything other
- * than %0, we break out and return that value.
+ * If @fn returns anything other than %0, the iteration stops and that
+ * value is returned from this function.
  *
- * The caller must serialize idr_for_each() vs idr_get_new() and idr_remove().
+ * idr_for_each() can be called concurrently with idr_alloc() and
+ * idr_remove() if protected by RCU.  Newly added entries may not be
+ * seen and deleted entries may be seen, but adding and removing entries
+ * will not cause other entries to be skipped, nor spurious ones to be seen.
  */
-int idr_for_each(struct idr *idp,
-		 int (*fn)(int id, void *p, void *data), void *data)
+int idr_for_each(const struct idr *idr,
+		int (*fn)(int id, void *p, void *data), void *data)
 {
-	int n, id, max, error = 0;
-	struct idr_layer *p;
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer **paa = &pa[0];
-
-	n = idp->layers * IDR_BITS;
-	*paa = rcu_dereference_raw(idp->top);
-	max = idr_max(idp->layers);
-
-	id = 0;
-	while (id >= 0 && id <= max) {
-		p = *paa;
-		while (n > 0 && p) {
-			n -= IDR_BITS;
-			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-			*++paa = p;
-		}
-
-		if (p) {
-			error = fn(id, (void *)p, data);
-			if (error)
-				break;
-		}
+	struct radix_tree_iter iter;
+	void **slot;
 
-		id += 1 << n;
-		while (n < fls(id)) {
-			n += IDR_BITS;
-			--paa;
-		}
+	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
+		int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+		if (ret)
+			return ret;
 	}
 
-	return error;
+	return 0;
 }
 EXPORT_SYMBOL(idr_for_each);
 
 /**
- * idr_get_next - lookup next object of id to given id.
- * @idp: idr handle
- * @nextidp:  pointer to lookup key
- *
- * Returns pointer to registered object with id, which is next number to
- * given id. After being looked up, *@nextidp will be updated for the next
- * iteration.
- *
- * This function can be called under rcu_read_lock(), given that the leaf
- * pointers lifetimes are correctly managed.
+ * idr_get_next - Find next populated entry
+ * @idr: idr handle
+ * @nextid: Pointer to lowest possible ID to return
+ *
+ * Returns the next populated entry in the tree with an ID greater than
+ * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
+ * to the ID of the found value.  To use in a loop, the value pointed to by
+ * nextid must be incremented by the user.
  */
-void *idr_get_next(struct idr *idp, int *nextidp)
+void *idr_get_next(struct idr *idr, int *nextid)
 {
-	struct idr_layer *p, *pa[MAX_IDR_LEVEL + 1];
-	struct idr_layer **paa = &pa[0];
-	int id = *nextidp;
-	int n, max;
+	struct radix_tree_iter iter;
+	void **slot;
 
-	/* find first ent */
-	p = *paa = rcu_dereference_raw(idp->top);
-	if (!p)
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	if (!slot)
 		return NULL;
-	n = (p->layer + 1) * IDR_BITS;
-	max = idr_max(p->layer + 1);
-
-	while (id >= 0 && id <= max) {
-		p = *paa;
-		while (n > 0 && p) {
-			n -= IDR_BITS;
-			p = rcu_dereference_raw(p->ary[(id >> n) & IDR_MASK]);
-			*++paa = p;
-		}
 
-		if (p) {
-			*nextidp = id;
-			return p;
-		}
-
-		/*
-		 * Proceed to the next layer at the current level.  Unlike
-		 * idr_for_each(), @id isn't guaranteed to be aligned to
-		 * layer boundary at this point and adding 1 << n may
-		 * incorrectly skip IDs.  Make sure we jump to the
-		 * beginning of the next layer using round_up().
-		 */
-		id = round_up(id + 1, 1 << n);
-		while (n < fls(id)) {
-			n += IDR_BITS;
-			--paa;
-		}
-	}
-	return NULL;
+	*nextid = iter.index;
+	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
 
-
 /**
  * idr_replace - replace pointer for given id
- * @idp: idr handle
- * @ptr: pointer you want associated with the id
- * @id: lookup key
+ * @idr: idr handle
+ * @ptr: New pointer to associate with the ID
+ * @id: Lookup key
  *
- * Replace the pointer registered with an id and return the old value.
- * A %-ENOENT return indicates that @id was not found.
- * A %-EINVAL return indicates that @id was not within valid constraints.
+ * Replace the pointer registered with an ID and return the old value.
+ * This function can be called under the RCU read lock concurrently with
+ * idr_alloc() and idr_remove() (as long as the ID being removed is not
+ * the one being replaced!).
  *
- * The caller must serialize with writers.
+ * Returns: 0 on success.  %-ENOENT indicates that @id was not found.
+ * %-EINVAL indicates that @id or @ptr were not valid.
  */
-void *idr_replace(struct idr *idp, void *ptr, int id)
+void *idr_replace(struct idr *idr, void *ptr, int id)
 {
-	int n;
-	struct idr_layer *p, *old_p;
+	struct radix_tree_node *node;
+	void **slot = NULL;
+	void *entry;
 
-	if (id < 0)
+	if (WARN_ON_ONCE(id < 0))
+		return ERR_PTR(-EINVAL);
+	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
 
-	p = idp->top;
-	if (!p)
-		return ERR_PTR(-ENOENT);
-
-	if (id > idr_max(p->layer + 1))
-		return ERR_PTR(-ENOENT);
-
-	n = p->layer * IDR_BITS;
-	while ((n > 0) && p) {
-		p = p->ary[(id >> n) & IDR_MASK];
-		n -= IDR_BITS;
-	}
-
-	n = id & IDR_MASK;
-	if (unlikely(p == NULL || !test_bit(n, p->bitmap)))
+	entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
+	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
 		return ERR_PTR(-ENOENT);
 
-	old_p = p->ary[n];
-	rcu_assign_pointer(p->ary[n], ptr);
+	__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
 
-	return old_p;
+	return entry;
 }
 EXPORT_SYMBOL(idr_replace);
 
-void __init idr_init_cache(void)
-{
-	idr_layer_cache = kmem_cache_create("idr_layer_cache",
-				sizeof(struct idr_layer), 0, SLAB_PANIC, NULL);
-}
-
-/**
- * idr_init - initialize idr handle
- * @idp:	idr handle
- *
- * This function is use to set up the handle (@idp) that you will pass
- * to the rest of the functions.
- */
-void idr_init(struct idr *idp)
-{
-	memset(idp, 0, sizeof(struct idr));
-	spin_lock_init(&idp->lock);
-}
-EXPORT_SYMBOL(idr_init);
-
-static int idr_has_entry(int id, void *p, void *data)
-{
-	return 1;
-}
-
-bool idr_is_empty(struct idr *idp)
-{
-	return !idr_for_each(idp, idr_has_entry, NULL);
-}
-EXPORT_SYMBOL(idr_is_empty);
-
 /**
  * DOC: IDA description
- * IDA - IDR based ID allocator
- *
- * This is id allocator without id -> pointer translation.  Memory
- * usage is much lower than full blown idr because each id only
- * occupies a bit.  ida uses a custom leaf node which contains
- * IDA_BITMAP_BITS slots.
  *
- * 2007-04-25  written by Tejun Heo <htejun@gmail.com>
+ * The IDA is an ID allocator which does not provide the ability to
+ * associate an ID with a pointer.  As such, it only needs to store one
+ * bit per ID, and so is more space efficient than an IDR.  To use an IDA,
+ * define it using DEFINE_IDA() (or embed a &struct ida in a data structure,
+ * then initialise it using ida_init()).  To allocate a new ID, call
+ * ida_simple_get().  To free an ID, call ida_simple_remove().
+ *
+ * If you have more complex locking requirements, use a loop around
+ * ida_pre_get() and ida_get_new() to allocate a new ID.  Then use
+ * ida_remove() to free an ID.  You must make sure that ida_get_new() and
+ * ida_remove() cannot be called at the same time as each other for the
+ * same IDA.
+ *
+ * You can also use ida_get_new_above() if you need an ID to be allocated
+ * above a particular number.  ida_destroy() can be used to dispose of an
+ * IDA without needing to free the individual IDs in it.  You can use
+ * ida_is_empty() to find out whether the IDA has any IDs currently allocated.
+ *
+ * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
+ * limitation, it should be quite straightforward to raise the maximum.
  */
 
-static void free_bitmap(struct ida *ida, struct ida_bitmap *bitmap)
-{
-	unsigned long flags;
-
-	if (!ida->free_bitmap) {
-		spin_lock_irqsave(&ida->idr.lock, flags);
-		if (!ida->free_bitmap) {
-			ida->free_bitmap = bitmap;
-			bitmap = NULL;
-		}
-		spin_unlock_irqrestore(&ida->idr.lock, flags);
-	}
-
-	kfree(bitmap);
-}
-
 /**
  * ida_pre_get - reserve resources for ida allocation
- * @ida:	ida handle
- * @gfp_mask:	memory allocation flag
- *
- * This function should be called prior to locking and calling the
- * following function.  It preallocates enough memory to satisfy the
- * worst possible allocation.
+ * @ida: ida handle
+ * @gfp: memory allocation flags
  *
- * If the system is REALLY out of memory this function returns %0,
- * otherwise %1.
+ * This function should be called before calling ida_get_new_above().  If it
+ * is unable to allocate memory, it will return %0.  On success, it returns %1.
  */
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask)
+int ida_pre_get(struct ida *ida, gfp_t gfp)
 {
-	/* allocate idr_layers */
-	if (!__idr_pre_get(&ida->idr, gfp_mask))
-		return 0;
+	struct ida_bitmap *bitmap;
 
-	/* allocate free_bitmap */
-	if (!ida->free_bitmap) {
-		struct ida_bitmap *bitmap;
+	/*
+	 * This looks weird, but the IDA API has no preload_end() equivalent.
+	 * Instead, ida_get_new() can return -EAGAIN, prompting the caller
+	 * to return to the ida_pre_get() step.
+	 */
+	idr_preload(gfp);
+	idr_preload_end();
 
-		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp_mask);
+	if (!ida->free_bitmap) {
+		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp);
 		if (!bitmap)
 			return 0;
-
-		free_bitmap(ida, bitmap);
+		bitmap = xchg(&ida->free_bitmap, bitmap);
+		kfree(bitmap);
 	}
 
 	return 1;
 }
 EXPORT_SYMBOL(ida_pre_get);
 
+#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
+
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
- * @ida:	ida handle
- * @starting_id: id to start search at
- * @p_id:	pointer to the allocated handle
+ * @ida: ida handle
+ * @start: id to start search at
+ * @id: pointer to the allocated handle
  *
- * Allocate new ID above or equal to @starting_id.  It should be called
- * with any required locks.
+ * Allocate new ID above or equal to @start.  It should be called
+ * with any required locks to ensure that concurrent calls to
+ * ida_get_new_above() / ida_get_new() / ida_remove() are not allowed.
+ * Consider using ida_simple_get() if you do not have complex locking
+ * requirements.
  *
  * If memory is required, it will return %-EAGAIN, you should unlock
  * and go back to the ida_pre_get() call.  If the ida is full, it will
- * return %-ENOSPC.
- *
- * Note that callers must ensure that concurrent access to @ida is not possible.
- * See ida_simple_get() for a varaint which takes care of locking.
+ * return %-ENOSPC.  On success, it will return 0.
  *
- * @p_id returns a value in the range @starting_id ... %0x7fffffff.
+ * @id returns a value in the range @start ... %0x7fffffff.
  */
-int ida_get_new_above(struct ida *ida, int starting_id, int *p_id)
+int ida_get_new_above(struct ida *ida, int start, int *id)
 {
-	struct idr_layer *pa[MAX_IDR_LEVEL + 1];
+	struct radix_tree_root *root = &ida->ida_rt;
+	void **slot;
+	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
-	unsigned long flags;
-	int idr_id = starting_id / IDA_BITMAP_BITS;
-	int offset = starting_id % IDA_BITMAP_BITS;
-	int t, id;
-
- restart:
-	/* get vacant slot */
-	t = idr_get_empty_slot(&ida->idr, idr_id, pa, 0, &ida->idr);
-	if (t < 0)
-		return t == -ENOMEM ? -EAGAIN : t;
-
-	if (t * IDA_BITMAP_BITS >= MAX_IDR_BIT)
-		return -ENOSPC;
-
-	if (t != idr_id)
-		offset = 0;
-	idr_id = t;
-
-	/* if bitmap isn't there, create a new one */
-	bitmap = (void *)pa[0]->ary[idr_id & IDR_MASK];
-	if (!bitmap) {
-		spin_lock_irqsave(&ida->idr.lock, flags);
-		bitmap = ida->free_bitmap;
-		ida->free_bitmap = NULL;
-		spin_unlock_irqrestore(&ida->idr.lock, flags);
-
-		if (!bitmap)
-			return -EAGAIN;
-
-		memset(bitmap, 0, sizeof(struct ida_bitmap));
-		rcu_assign_pointer(pa[0]->ary[idr_id & IDR_MASK],
-				(void *)bitmap);
-		pa[0]->count++;
-	}
-
-	/* lookup for empty slot */
-	t = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, offset);
-	if (t == IDA_BITMAP_BITS) {
-		/* no empty slot after offset, continue to the next chunk */
-		idr_id++;
-		offset = 0;
-		goto restart;
-	}
-
-	id = idr_id * IDA_BITMAP_BITS + t;
-	if (id >= MAX_IDR_BIT)
-		return -ENOSPC;
-
-	__set_bit(t, bitmap->bitmap);
-	if (++bitmap->nr_busy == IDA_BITMAP_BITS)
-		idr_mark_full(pa, idr_id);
+	unsigned long index;
+	unsigned bit;
+	int new;
+
+	index = start / IDA_BITMAP_BITS;
+	bit = start % IDA_BITMAP_BITS;
+
+	slot = radix_tree_iter_init(&iter, index);
+	for (;;) {
+		if (slot)
+			slot = radix_tree_next_slot(slot, &iter,
+						RADIX_TREE_ITER_TAGGED);
+		if (!slot) {
+			slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
+			if (IS_ERR(slot)) {
+				if (slot == ERR_PTR(-ENOMEM))
+					return -EAGAIN;
+				return PTR_ERR(slot);
+			}
+		}
+		if (iter.index > index)
+			bit = 0;
+		new = iter.index * IDA_BITMAP_BITS;
+		bitmap = rcu_dereference_raw(*slot);
+		if (bitmap) {
+			bit = find_next_zero_bit(bitmap->bitmap,
+							IDA_BITMAP_BITS, bit);
+			new += bit;
+			if (new < 0)
+				return -ENOSPC;
+			if (bit == IDA_BITMAP_BITS)
+				continue;
 
-	*p_id = id;
+			__set_bit(bit, bitmap->bitmap);
+			if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+				radix_tree_iter_tag_clear(root, &iter,
+								IDR_FREE);
+		} else {
+			new += bit;
+			if (new < 0)
+				return -ENOSPC;
+			bitmap = ida->free_bitmap;
+			if (!bitmap)
+				return -EAGAIN;
+			ida->free_bitmap = NULL;
+			memset(bitmap, 0, sizeof(*bitmap));
+			__set_bit(bit, bitmap->bitmap);
+			radix_tree_iter_replace(root, &iter, slot, bitmap);
+		}
 
-	/* Each leaf node can handle nearly a thousand slots and the
-	 * whole idea of ida is to have small memory foot print.
-	 * Throw away extra resources one by one after each successful
-	 * allocation.
-	 */
-	if (ida->idr.id_free_cnt || ida->free_bitmap) {
-		struct idr_layer *p = get_from_free_list(&ida->idr);
-		if (p)
-			kmem_cache_free(idr_layer_cache, p);
+		*id = new;
+		return 0;
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL(ida_get_new_above);
 
 /**
- * ida_remove - remove the given ID
- * @ida:	ida handle
- * @id:		ID to free
+ * ida_remove - Free the given ID
+ * @ida: ida handle
+ * @id: ID to free
+ *
+ * This function should not be called at the same time as ida_get_new_above().
  */
 void ida_remove(struct ida *ida, int id)
 {
-	struct idr_layer *p = ida->idr.top;
-	int shift = (ida->idr.layers - 1) * IDR_BITS;
-	int idr_id = id / IDA_BITMAP_BITS;
-	int offset = id % IDA_BITMAP_BITS;
-	int n;
+	unsigned long index = id / IDA_BITMAP_BITS;
+	unsigned offset = id % IDA_BITMAP_BITS;
 	struct ida_bitmap *bitmap;
+	struct radix_tree_iter iter;
+	void **slot;
 
-	if (idr_id > idr_max(ida->idr.layers))
+	slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
+	if (!slot)
 		goto err;
 
-	/* clear full bits while looking up the leaf idr_layer */
-	while ((shift > 0) && p) {
-		n = (idr_id >> shift) & IDR_MASK;
-		__clear_bit(n, p->bitmap);
-		p = p->ary[n];
-		shift -= IDR_BITS;
-	}
-
-	if (p == NULL)
-		goto err;
-
-	n = idr_id & IDR_MASK;
-	__clear_bit(n, p->bitmap);
-
-	bitmap = (void *)p->ary[n];
-	if (!bitmap || !test_bit(offset, bitmap->bitmap))
+	bitmap = rcu_dereference_raw(*slot);
+	if (!test_bit(offset, bitmap->bitmap))
 		goto err;
 
-	/* update bitmap and remove it if empty */
 	__clear_bit(offset, bitmap->bitmap);
-	if (--bitmap->nr_busy == 0) {
-		__set_bit(n, p->bitmap);	/* to please idr_remove() */
-		idr_remove(&ida->idr, idr_id);
-		free_bitmap(ida, bitmap);
+	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
+	if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+		kfree(bitmap);
+		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
-
 	return;
-
  err:
 	WARN(1, "ida_remove called for id=%d which is not allocated.\n", id);
 }
 EXPORT_SYMBOL(ida_remove);
 
 /**
- * ida_destroy - release all cached layers within an ida tree
- * @ida:		ida handle
+ * ida_destroy - Free the contents of an ida
+ * @ida: ida handle
+ *
+ * Calling this function releases all resources associated with an IDA.  When
+ * this call returns, the IDA is empty and can be reused or freed.  The caller
+ * should not allow ida_remove() or ida_get_new_above() to be called at the
+ * same time.
  */
 void ida_destroy(struct ida *ida)
 {
-	idr_destroy(&ida->idr);
+	struct radix_tree_iter iter;
+	void **slot;
+
+	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
+		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
+		kfree(bitmap);
+		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+	}
+
 	kfree(ida->free_bitmap);
+	ida->free_bitmap = NULL;
 }
 EXPORT_SYMBOL(ida_destroy);
 
@@ -1141,18 +442,3 @@ void ida_simple_remove(struct ida *ida, unsigned int id)
 	spin_unlock_irqrestore(&simple_ida_lock, flags);
 }
 EXPORT_SYMBOL(ida_simple_remove);
-
-/**
- * ida_init - initialize ida handle
- * @ida:	ida handle
- *
- * This function is use to set up the handle (@ida) that you will pass
- * to the rest of the functions.
- */
-void ida_init(struct ida *ida)
-{
-	memset(ida, 0, sizeof(struct ida));
-	idr_init(&ida->idr);
-
-}
-EXPORT_SYMBOL(ida_init);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7bf7d4e60e43..eaea14b8f2ca 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,20 +22,21 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/radix-tree.h>
+#include <linux/kmemleak.h>
 #include <linux/percpu.h>
+#include <linux/preempt.h>		/* in_interrupt() */
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
 #include <linux/slab.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
 #include <linux/string.h>
-#include <linux/bitops.h>
-#include <linux/rcupdate.h>
-#include <linux/preempt.h>		/* in_interrupt() */
 
 
 /* Number of nodes in fully populated tree of given height */
@@ -59,6 +60,15 @@ static struct kmem_cache *radix_tree_node_cachep;
  */
 #define RADIX_TREE_PRELOAD_SIZE (RADIX_TREE_MAX_PATH * 2 - 1)
 
+/*
+ * The IDR does not have to be as high as the radix tree since it uses
+ * signed integers, not unsigned longs.
+ */
+#define IDR_INDEX_BITS		(8 /* CHAR_BIT */ * sizeof(int) - 1)
+#define IDR_MAX_PATH		(DIV_ROUND_UP(IDR_INDEX_BITS, \
+						RADIX_TREE_MAP_SHIFT))
+#define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
+
 /*
  * Per-cpu pool of preloaded nodes
  */
@@ -149,27 +159,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
 
 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
+	root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
+	root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear_all(struct radix_tree_root *root)
 {
-	root->gfp_mask &= __GFP_BITS_MASK;
+	root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
 }
 
 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
 {
-	return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+	return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline unsigned root_tags_get(const struct radix_tree_root *root)
 {
-	return (__force unsigned)root->gfp_mask >> __GFP_BITS_SHIFT;
+	return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+}
+
+static inline bool is_idr(const struct radix_tree_root *root)
+{
+	return !!(root->gfp_mask & ROOT_IS_IDR);
 }
 
 /*
@@ -187,6 +202,11 @@ static inline int any_tag_set(const struct radix_tree_node *node,
 	return 0;
 }
 
+static inline void all_tag_set(struct radix_tree_node *node, unsigned int tag)
+{
+	bitmap_fill(node->tags[tag], RADIX_TREE_MAP_SIZE);
+}
+
 /**
  * radix_tree_find_next_bit - find the next set bit in a memory region
  *
@@ -240,6 +260,13 @@ static inline unsigned long node_maxindex(const struct radix_tree_node *node)
 	return shift_maxindex(node->shift);
 }
 
+static unsigned long next_index(unsigned long index,
+				const struct radix_tree_node *node,
+				unsigned long offset)
+{
+	return (index & ~node_maxindex(node)) + (offset << node->shift);
+}
+
 #ifndef __KERNEL__
 static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
@@ -278,11 +305,52 @@ static void radix_tree_dump(struct radix_tree_root *root)
 {
 	pr_debug("radix root: %p rnode %p tags %x\n",
 			root, root->rnode,
-			root->gfp_mask >> __GFP_BITS_SHIFT);
+			root->gfp_mask >> ROOT_TAG_SHIFT);
 	if (!radix_tree_is_internal_node(root->rnode))
 		return;
 	dump_node(entry_to_node(root->rnode), 0);
 }
+
+static void dump_ida_node(void *entry, unsigned long index)
+{
+	unsigned long i;
+
+	if (!entry)
+		return;
+
+	if (radix_tree_is_internal_node(entry)) {
+		struct radix_tree_node *node = entry_to_node(entry);
+
+		pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
+			node, node->offset, index * IDA_BITMAP_BITS,
+			((index | node_maxindex(node)) + 1) *
+				IDA_BITMAP_BITS - 1,
+			node->parent, node->tags[0][0], node->shift,
+			node->count);
+		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+			dump_ida_node(node->slots[i],
+					index | (i << node->shift));
+	} else {
+		struct ida_bitmap *bitmap = entry;
+
+		pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
+				(int)(index & RADIX_TREE_MAP_MASK),
+				index * IDA_BITMAP_BITS,
+				(index + 1) * IDA_BITMAP_BITS - 1);
+		for (i = 0; i < IDA_BITMAP_LONGS; i++)
+			pr_cont(" %lx", bitmap->bitmap[i]);
+		pr_cont("\n");
+	}
+}
+
+static void ida_dump(struct ida *ida)
+{
+	struct radix_tree_root *root = &ida->ida_rt;
+	pr_debug("ida: %p %p free %d bitmap %p\n", ida, root->rnode,
+				root->gfp_mask >> ROOT_TAG_SHIFT,
+				ida->free_bitmap);
+	dump_ida_node(root->rnode, 0);
+}
 #endif
 
 /*
@@ -290,13 +358,11 @@ static void radix_tree_dump(struct radix_tree_root *root)
  * that the caller has pinned this thread of control to the current CPU.
  */
 static struct radix_tree_node *
-radix_tree_node_alloc(struct radix_tree_root *root,
-			struct radix_tree_node *parent,
+radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 			unsigned int shift, unsigned int offset,
 			unsigned int count, unsigned int exceptional)
 {
 	struct radix_tree_node *ret = NULL;
-	gfp_t gfp_mask = root_gfp_mask(root);
 
 	/*
 	 * Preload code isn't irq safe and it doesn't make sense to use
@@ -533,7 +599,7 @@ static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 /*
  *	Extend a radix tree so it can store key @index.
  */
-static int radix_tree_extend(struct radix_tree_root *root,
+static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 				unsigned long index, unsigned int shift)
 {
 	struct radix_tree_node *slot;
@@ -546,19 +612,27 @@ static int radix_tree_extend(struct radix_tree_root *root,
 		maxshift += RADIX_TREE_MAP_SHIFT;
 
 	slot = root->rnode;
-	if (!slot)
+	if (!slot && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
 		goto out;
 
 	do {
-		struct radix_tree_node *node = radix_tree_node_alloc(root,
-							NULL, shift, 0, 1, 0);
+		struct radix_tree_node *node = radix_tree_node_alloc(gfp, NULL,
+								shift, 0, 1, 0);
 		if (!node)
 			return -ENOMEM;
 
-		/* Propagate the aggregated tag info into the new root */
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-			if (root_tag_get(root, tag))
-				tag_set(node, tag, 0);
+		if (is_idr(root)) {
+			all_tag_set(node, IDR_FREE);
+			if (!root_tag_get(root, IDR_FREE)) {
+				tag_clear(node, IDR_FREE, 0);
+				root_tag_set(root, IDR_FREE);
+			}
+		} else {
+			/* Propagate the aggregated tag info to the new child */
+			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
+				if (root_tag_get(root, tag))
+					tag_set(node, tag, 0);
+			}
 		}
 
 		BUG_ON(shift > BITS_PER_LONG);
@@ -619,6 +693,8 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 * one (root->rnode) as far as dependent read barriers go.
 		 */
 		root->rnode = child;
+		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
+			root_tag_clear(root, IDR_FREE);
 
 		/*
 		 * We have a dilemma here. The node's slot[0] must not be
@@ -674,7 +750,12 @@ static bool delete_node(struct radix_tree_root *root,
 			parent->slots[node->offset] = NULL;
 			parent->count--;
 		} else {
-			root_tag_clear_all(root);
+			/*
+			 * Shouldn't the tags already have all been cleared
+			 * by the caller?
+			 */
+			if (!is_idr(root))
+				root_tag_clear_all(root);
 			root->rnode = NULL;
 		}
 
@@ -714,6 +795,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
 	unsigned long max = index | ((1UL << order) - 1);
+	gfp_t gfp = root_gfp_mask(root);
 
 	shift = radix_tree_load_root(root, &child, &maxindex);
 
@@ -721,7 +803,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	if (order > 0 && max == ((1UL << order) - 1))
 		max++;
 	if (max > maxindex) {
-		int error = radix_tree_extend(root, max, shift);
+		int error = radix_tree_extend(root, gfp, max, shift);
 		if (error < 0)
 			return error;
 		shift = error;
@@ -732,7 +814,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		shift -= RADIX_TREE_MAP_SHIFT;
 		if (child == NULL) {
 			/* Have to add a child node.  */
-			child = radix_tree_node_alloc(root, node, shift,
+			child = radix_tree_node_alloc(gfp, node, shift,
 							offset, 0, 0);
 			if (!child)
 				return -ENOMEM;
@@ -755,7 +837,6 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 	return 0;
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
 /*
  * Free any nodes below this node.  The tree is presumed to not need
  * shrinking, and any user data in the tree is presumed to not need a
@@ -791,6 +872,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 	}
 }
 
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
 static inline int insert_entries(struct radix_tree_node *node, void **slot,
 				void *item, unsigned order, bool replace)
 {
@@ -996,69 +1078,70 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-static inline int slot_count(struct radix_tree_node *node,
-						void **slot)
+static inline void replace_sibling_entries(struct radix_tree_node *node,
+				void **slot, int count, int exceptional)
 {
-	int n = 1;
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot);
-	int i;
+	unsigned offset = get_slot_offset(node, slot) + 1;
 
-	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
-		if (node->slots[offset + i] != ptr)
+	while (offset < RADIX_TREE_MAP_SIZE) {
+		if (node->slots[offset] != ptr)
 			break;
-		n++;
+		if (count < 0) {
+			node->slots[offset] = NULL;
+			node->count--;
+		}
+		node->exceptional += exceptional;
+		offset++;
 	}
 #endif
-	return n;
 }
 
-static void replace_slot(struct radix_tree_root *root,
-			 struct radix_tree_node *node,
-			 void **slot, void *item,
-			 bool warn_typeswitch)
+static void replace_slot(void **slot, void *item, struct radix_tree_node *node,
+				int count, int exceptional)
 {
-	void *old = rcu_dereference_raw(*slot);
-	int count, exceptional;
-
-	WARN_ON_ONCE(radix_tree_is_internal_node(item));
-
-	count = !!item - !!old;
-	exceptional = !!radix_tree_exceptional_entry(item) -
-		      !!radix_tree_exceptional_entry(old);
-
-	WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
+	if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
+		return;
 
-	if (node) {
+	if (node && (count || exceptional)) {
 		node->count += count;
-		if (exceptional) {
-			exceptional *= slot_count(node, slot);
-			node->exceptional += exceptional;
-		}
+		node->exceptional += exceptional;
+		replace_sibling_entries(node, slot, count, exceptional);
 	}
 
 	rcu_assign_pointer(*slot, item);
 }
 
-static inline void delete_sibling_entries(struct radix_tree_node *node,
-						void **slot)
+static bool node_tag_get(const struct radix_tree_root *root,
+				const struct radix_tree_node *node,
+				unsigned int tag, unsigned int offset)
 {
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	bool exceptional = radix_tree_exceptional_entry(*slot);
-	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot);
-	int i;
+	if (node)
+		return tag_get(node, tag, offset);
+	return root_tag_get(root, tag);
+}
 
-	for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
-		if (node->slots[offset + i] != ptr)
-			break;
-		node->slots[offset + i] = NULL;
-		node->count--;
-		if (exceptional)
-			node->exceptional--;
+/*
+ * IDR users want to be able to store NULL in the tree, so if the slot isn't
+ * free, don't adjust the count, even if it's transitioning between NULL and
+ * non-NULL.  For the IDA, we mark slots as being IDR_FREE while they still
+ * have empty bits, but it only stores NULL in slots when they're being
+ * deleted.
+ */
+static int calculate_count(struct radix_tree_root *root,
+				struct radix_tree_node *node, void **slot,
+				void *item, void *old)
+{
+	if (is_idr(root)) {
+		unsigned offset = get_slot_offset(node, slot);
+		bool free = node_tag_get(root, node, IDR_FREE, offset);
+		if (!free)
+			return 0;
+		if (!old)
+			return 1;
 	}
-#endif
+	return !!item - !!old;
 }
 
 /**
@@ -1078,15 +1161,19 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  void **slot, void *item,
 			  radix_tree_update_node_t update_node, void *private)
 {
-	if (!item)
-		delete_sibling_entries(node, slot);
+	void *old = rcu_dereference_raw(*slot);
+	int exceptional = !!radix_tree_exceptional_entry(item) -
+				!!radix_tree_exceptional_entry(old);
+	int count = calculate_count(root, node, slot, item, old);
+
 	/*
 	 * This function supports replacing exceptional entries and
 	 * deleting entries, but that needs accounting against the
 	 * node unless the slot is root->rnode.
 	 */
-	replace_slot(root, node, slot, item,
-		     !node && slot != (void **)&root->rnode);
+	WARN_ON_ONCE(!node && (slot != (void **)&root->rnode) &&
+			(count || exceptional));
+	replace_slot(slot, item, node, count, exceptional);
 
 	if (!node)
 		return;
@@ -1116,7 +1203,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void **slot, void *item)
 {
-	replace_slot(root, NULL, slot, item, true);
+	__radix_tree_replace(root, NULL, slot, item, NULL, NULL);
 }
 
 /**
@@ -1191,6 +1278,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 	void **slot;
 	unsigned int offset, end;
 	unsigned n, tag, tags = 0;
+	gfp_t gfp = root_gfp_mask(root);
 
 	if (!__radix_tree_lookup(root, index, &parent, &slot))
 		return -ENOENT;
@@ -1228,7 +1316,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 
 	for (;;) {
 		if (node->shift > order) {
-			child = radix_tree_node_alloc(root, node,
+			child = radix_tree_node_alloc(gfp, node,
 					node->shift - RADIX_TREE_MAP_SHIFT,
 					offset, 0, 0);
 			if (!child)
@@ -1444,8 +1532,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
 	radix_tree_load_root(root, &node, &maxindex);
 	if (index > maxindex)
 		return 0;
-	if (node == NULL)
-		return 0;
 
 	while (radix_tree_is_internal_node(node)) {
 		unsigned offset;
@@ -1453,8 +1539,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
 		parent = entry_to_node(node);
 		offset = radix_tree_descend(parent, &node, index);
 
-		if (!node)
-			return 0;
 		if (!tag_get(parent, tag, offset))
 			return 0;
 		if (node == RADIX_TREE_RETRY)
@@ -1481,6 +1565,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 	unsigned tag_long = offset / BITS_PER_LONG;
 	unsigned tag_bit  = offset % BITS_PER_LONG;
 
+	if (!node) {
+		iter->tags = 1;
+		return;
+	}
+
 	iter->tags = node->tags[tag][tag_long] >> tag_bit;
 
 	/* This never happens if RADIX_TREE_TAG_LONGS == 1 */
@@ -1873,13 +1962,18 @@ void __radix_tree_delete_node(struct radix_tree_root *root,
 static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void **slot)
 {
+	void *old = rcu_dereference_raw(*slot);
+	int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
 	unsigned offset = get_slot_offset(node, slot);
 	int tag;
 
-	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-		node_tag_clear(root, node, tag, offset);
+	if (is_idr(root))
+		node_tag_set(root, node, IDR_FREE, offset);
+	else
+		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+			node_tag_clear(root, node, tag, offset);
 
-	replace_slot(root, node, slot, NULL, true);
+	replace_slot(slot, NULL, node, -1, exceptional);
 	return node && delete_node(root, node, NULL, NULL);
 }
 
@@ -1916,12 +2010,13 @@ void radix_tree_iter_delete(struct radix_tree_root *root,
 void *radix_tree_delete_item(struct radix_tree_root *root,
 			     unsigned long index, void *item)
 {
-	struct radix_tree_node *node;
+	struct radix_tree_node *node = NULL;
 	void **slot;
 	void *entry;
 
 	entry = __radix_tree_lookup(root, index, &node, &slot);
-	if (!entry)
+	if (!entry && (!is_idr(root) || node_tag_get(root, node, IDR_FREE,
+						get_slot_offset(node, slot))))
 		return NULL;
 
 	if (item && entry != item)
@@ -1957,8 +2052,7 @@ void radix_tree_clear_tags(struct radix_tree_root *root,
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			node_tag_clear(root, node, tag, offset);
 	} else {
-		/* Clear root node tags */
-		root->gfp_mask &= __GFP_BITS_MASK;
+		root_tag_clear_all(root);
 	}
 }
 
@@ -1973,6 +2067,111 @@ int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
 }
 EXPORT_SYMBOL(radix_tree_tagged);
 
+/**
+ * idr_preload - preload for idr_alloc()
+ * @gfp_mask: allocation mask to use for preloading
+ *
+ * Preallocate memory to use for the next call to idr_alloc().  This function
+ * returns with preemption disabled.  It will be enabled by idr_preload_end().
+ */
+void idr_preload(gfp_t gfp_mask)
+{
+	__radix_tree_preload(gfp_mask, IDR_PRELOAD_SIZE);
+}
+EXPORT_SYMBOL(idr_preload);
+
+void **idr_get_free(struct radix_tree_root *root,
+			struct radix_tree_iter *iter, gfp_t gfp, int end)
+{
+	struct radix_tree_node *node = NULL, *child;
+	void **slot = (void **)&root->rnode;
+	unsigned long maxindex, start = iter->next_index;
+	unsigned long max = end > 0 ? end - 1 : INT_MAX;
+	unsigned int shift, offset = 0;
+
+ grow:
+	shift = radix_tree_load_root(root, &child, &maxindex);
+	if (!radix_tree_tagged(root, IDR_FREE))
+		start = max(start, maxindex + 1);
+	if (start > max)
+		return ERR_PTR(-ENOSPC);
+
+	if (start > maxindex) {
+		int error = radix_tree_extend(root, gfp, start, shift);
+		if (error < 0)
+			return ERR_PTR(error);
+		shift = error;
+		child = rcu_dereference_raw(root->rnode);
+	}
+
+	while (shift) {
+		shift -= RADIX_TREE_MAP_SHIFT;
+		if (child == NULL) {
+			/* Have to add a child node.  */
+			child = radix_tree_node_alloc(gfp, node, shift, offset,
+							0, 0);
+			if (!child)
+				return ERR_PTR(-ENOMEM);
+			all_tag_set(child, IDR_FREE);
+			rcu_assign_pointer(*slot, node_to_entry(child));
+			if (node)
+				node->count++;
+		} else if (!radix_tree_is_internal_node(child))
+			break;
+
+		node = entry_to_node(child);
+		offset = radix_tree_descend(node, &child, start);
+		if (!tag_get(node, IDR_FREE, offset)) {
+			offset = radix_tree_find_next_bit(node, IDR_FREE,
+							offset + 1);
+			start = next_index(start, node, offset);
+			if (start > max)
+				return ERR_PTR(-ENOSPC);
+			while (offset == RADIX_TREE_MAP_SIZE) {
+				offset = node->offset + 1;
+				node = node->parent;
+				if (!node)
+					goto grow;
+				shift = node->shift;
+			}
+			child = rcu_dereference_raw(node->slots[offset]);
+		}
+		slot = &node->slots[offset];
+	}
+
+	iter->index = start;
+	if (node)
+		iter->next_index = 1 + min(max, (start | node_maxindex(node)));
+	else
+		iter->next_index = 1;
+	iter->node = node;
+	__set_iter_shift(iter, shift);
+	set_iter_tags(iter, node, offset, IDR_FREE);
+
+	return slot;
+}
+
+/**
+ * idr_destroy - release all internal memory from an IDR
+ * @idr: idr handle
+ *
+ * After this function is called, the IDR is empty, and may be reused or
+ * the data structure containing it may be freed.
+ *
+ * A typical clean-up sequence for objects stored in an idr tree will use
+ * idr_for_each() to free all objects, if necessary, then idr_destroy() to
+ * free the memory used to keep track of those objects.
+ */
+void idr_destroy(struct idr *idr)
+{
+	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+	if (radix_tree_is_internal_node(node))
+		radix_tree_free_nodes(node);
+	idr->idr_rt.rnode = NULL;
+	root_tag_set(&idr->idr_rt, IDR_FREE);
+}
+EXPORT_SYMBOL(idr_destroy);
+
 static void
 radix_tree_node_ctor(void *arg)
 {
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 11d888ca6a92..3b5534b643f0 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,2 +1,3 @@
 main
 radix-tree.c
+idr.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 5274e88cd293..3597a3a9f269 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -2,8 +2,8 @@
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
 TARGETS = main
-OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_bit.o \
-	 regression1.o regression2.o regression3.o multiorder.o \
+OFILES = main.o radix-tree.o idr.o linux.o test.o tag_check.o find_bit.o \
+	 regression1.o regression2.o regression3.o multiorder.o idr-test.o \
 	 iteration_check.o benchmark.o
 
 ifdef BENCHMARK
@@ -23,7 +23,11 @@ vpath %.c ../../lib
 $(OFILES): *.h */*.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
-	../../../include/linux/radix-tree.h
+	../../../include/linux/radix-tree.h \
+	../../../include/linux/idr.h
 
 radix-tree.c: ../../../lib/radix-tree.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+idr.c: ../../../lib/idr.c
+	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644
index 000000000000..4dffad9284e0
--- /dev/null
+++ b/tools/testing/radix-tree/idr-test.c
@@ -0,0 +1,342 @@
+/*
+ * idr-test.c: Test the IDR API
+ * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include "test.h"
+
+#define DUMMY_PTR	((void *)0x12)
+
+int item_idr_free(int id, void *p, void *data)
+{
+	struct item *item = p;
+	assert(item->index == id);
+	free(p);
+
+	return 0;
+}
+
+void item_idr_remove(struct idr *idr, int id)
+{
+	struct item *item = idr_find(idr, id);
+	assert(item->index == id);
+	idr_remove(idr, id);
+	free(item);
+}
+
+void idr_alloc_test(void)
+{
+	unsigned long i;
+	DEFINE_IDR(idr);
+
+	assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
+	assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
+	idr_remove(&idr, 0x3ffd);
+	idr_remove(&idr, 0);
+
+	for (i = 0x3ffe; i < 0x4003; i++) {
+		int id;
+		struct item *item;
+
+		if (i < 0x4000)
+			item = item_create(i, 0);
+		else
+			item = item_create(i - 0x3fff, 0);
+
+		id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
+		assert(id == item->index);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+void idr_replace_test(void)
+{
+	DEFINE_IDR(idr);
+
+	idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
+	idr_replace(&idr, &idr, 10);
+
+	idr_destroy(&idr);
+}
+
+/*
+ * Unlike the radix tree, you can put a NULL pointer -- with care -- into
+ * the IDR.  Some interfaces, like idr_find() do not distinguish between
+ * "present, value is NULL" and "not present", but that's exactly what some
+ * users want.
+ */
+void idr_null_test(void)
+{
+	int i;
+	DEFINE_IDR(idr);
+
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(!idr_is_empty(&idr));
+	idr_remove(&idr, 0);
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(!idr_is_empty(&idr));
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 0; i < 10; i++) {
+		assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
+	}
+
+	assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
+	assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
+	assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
+	assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
+	idr_remove(&idr, 5);
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
+	idr_remove(&idr, 5);
+
+	for (i = 0; i < 9; i++) {
+		idr_remove(&idr, i);
+		assert(!idr_is_empty(&idr));
+	}
+	idr_remove(&idr, 8);
+	assert(!idr_is_empty(&idr));
+	idr_remove(&idr, 9);
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
+	assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
+	assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
+
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 1; i < 10; i++) {
+		assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
+	}
+
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+}
+
+void idr_nowait_test(void)
+{
+	unsigned int i;
+	DEFINE_IDR(idr);
+
+	idr_preload(GFP_KERNEL);
+
+	for (i = 0; i < 3; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
+	}
+
+	idr_preload_end();
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+void idr_checks(void)
+{
+	unsigned long i;
+	DEFINE_IDR(idr);
+
+	for (i = 0; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
+	}
+
+	assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 5000; i++)
+		item_idr_remove(&idr, i);
+
+	idr_remove(&idr, 3);
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+
+	assert(idr_is_empty(&idr));
+
+	idr_remove(&idr, 3);
+	idr_remove(&idr, 0);
+
+	for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
+	}
+	assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+	idr_destroy(&idr);
+
+	assert(idr_is_empty(&idr));
+
+	for (i = 1; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+
+	idr_replace_test();
+	idr_alloc_test();
+	idr_null_test();
+	idr_nowait_test();
+}
+
+/*
+ * Check that we get the correct error when we run out of memory doing
+ * allocations.  To ensure we run out of memory, just "forget" to preload.
+ * The first test is for not having a bitmap available, and the second test
+ * is for not being able to allocate a level of the radix tree.
+ */
+void ida_check_nomem(void)
+{
+	DEFINE_IDA(ida);
+	int id, err;
+
+	err = ida_get_new(&ida, &id);
+	assert(err == -EAGAIN);
+	err = ida_get_new_above(&ida, 1UL << 30, &id);
+	assert(err == -EAGAIN);
+}
+
+/*
+ * Check what happens when we fill a leaf and then delete it.  This may
+ * discover mishandling of IDR_FREE.
+ */
+void ida_check_leaf(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+	unsigned long i;
+
+	for (i = 0; i < IDA_BITMAP_BITS; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new(&ida, &id));
+	assert(id == 0);
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+}
+
+/*
+ * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
+ * Allocating up to 2^31-1 should succeed, and then allocating the next one
+ * should fail.
+ */
+void ida_check_max(void)
+{
+	DEFINE_IDA(ida);
+	int id, err;
+	unsigned long i, j;
+
+	for (j = 1; j < 65537; j *= 2) {
+		unsigned long base = (1UL << 31) - j;
+		for (i = 0; i < j; i++) {
+			assert(ida_pre_get(&ida, GFP_KERNEL));
+			assert(!ida_get_new_above(&ida, base, &id));
+			assert(id == base + i);
+		}
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		err = ida_get_new_above(&ida, base, &id);
+		assert(err == -ENOSPC);
+		ida_destroy(&ida);
+		assert(ida_is_empty(&ida));
+		rcu_barrier();
+	}
+}
+
+void ida_checks(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+	unsigned long i;
+
+	radix_tree_cpu_dead(1);
+	ida_check_nomem();
+
+	for (i = 0; i < 10000; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	ida_remove(&ida, 20);
+	ida_remove(&ida, 21);
+	for (i = 0; i < 3; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		if (i == 2)
+			assert(id == 10000);
+	}
+
+	for (i = 0; i < 5000; i++)
+		ida_remove(&ida, i);
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 5000, &id));
+	assert(id == 10001);
+
+	ida_destroy(&ida);
+
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1, &id));
+	assert(id == 1);
+
+	ida_remove(&ida, id);
+	assert(ida_is_empty(&ida));
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1, &id));
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1, &id));
+	assert(id == 1);
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 1025, &id));
+	assert(id == 1025);
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+	assert(!ida_get_new_above(&ida, 10000, &id));
+	assert(id == 10000);
+	ida_remove(&ida, 1025);
+	ida_destroy(&ida);
+	assert(ida_is_empty(&ida));
+
+	ida_check_leaf();
+	ida_check_max();
+
+	radix_tree_cpu_dead(1);
+}
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
index 701209816a6b..39a0dcb9475a 100644
--- a/tools/testing/radix-tree/linux/gfp.h
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -15,10 +15,12 @@
 #define __GFP_DIRECT_RECLAIM	0x400000u
 #define __GFP_KSWAPD_RECLAIM	0x2000000u
 
-#define __GFP_RECLAIM		(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+#define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+
+#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
 
-#define GFP_ATOMIC		(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
-#define GFP_KERNEL		(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
 
 static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
 {
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644
index 000000000000..4e342f2e37cf
--- /dev/null
+++ b/tools/testing/radix-tree/linux/idr.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index dd1d9aefb14f..63fce553781a 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -20,6 +20,7 @@
 
 #define printk printf
 #define pr_debug printk
+#define pr_cont printk
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index f7e9801a6754..ddd90a11db3f 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -3,6 +3,7 @@
 #include <unistd.h>
 #include <time.h>
 #include <assert.h>
+#include <limits.h>
 
 #include <linux/slab.h>
 #include <linux/radix-tree.h>
@@ -314,6 +315,11 @@ static void single_thread_tests(bool long_run)
 	rcu_barrier();
 	printf("after dynamic_height_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
+	idr_checks();
+	ida_checks();
+	rcu_barrier();
+	printf("after idr_checks: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
 	big_gang_check(long_run);
 	rcu_barrier();
 	printf("after big_gang_check: %d allocated, preempt %d\n",
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 056a23b56467..b30e11d9d271 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,8 @@ void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
+void idr_checks(void);
+void ida_checks(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
-- 
cgit v1.2.3


From 7ad3d4d85c7af9632055a6ac0aa15b6b6a321c6b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 16 Dec 2016 11:55:56 -0500
Subject: ida: Move ida_bitmap to a percpu variable

When we preload the IDA, we allocate an IDA bitmap.  Instead of storing
that preallocated bitmap in the IDA, we store it in a percpu variable.
Generally there are more IDAs in the system than CPUs, so this cuts down
on the number of preallocated bitmaps that are unused, and about half
of the IDA users did not call ida_destroy() so they were leaking IDA
bitmaps.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 include/linux/idr.h                     |  5 ++--
 lib/idr.c                               | 39 ++--------------------------
 lib/radix-tree.c                        | 45 ++++++++++++++++++++++++++++++---
 tools/testing/radix-tree/linux/kernel.h |  2 --
 tools/testing/radix-tree/linux/percpu.h |  5 +++-
 5 files changed, 51 insertions(+), 45 deletions(-)

(limited to 'tools/testing')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index f58c0a3addc3..2027c7aba50d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -14,6 +14,7 @@
 
 #include <linux/radix-tree.h>
 #include <linux/gfp.h>
+#include <linux/percpu.h>
 
 struct idr {
 	struct radix_tree_root	idr_rt;
@@ -171,9 +172,10 @@ struct ida_bitmap {
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
+DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
+
 struct ida {
 	struct radix_tree_root	ida_rt;
-	struct ida_bitmap	*free_bitmap;
 };
 
 #define IDA_INIT	{						\
@@ -193,7 +195,6 @@ void ida_simple_remove(struct ida *ida, unsigned int id);
 static inline void ida_init(struct ida *ida)
 {
 	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
-	ida->free_bitmap = NULL;
 }
 
 /**
diff --git a/lib/idr.c b/lib/idr.c
index b87056e2cc4c..2abd7769c430 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -4,6 +4,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
+DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
 /**
@@ -193,38 +194,6 @@ EXPORT_SYMBOL(idr_replace);
  * limitation, it should be quite straightforward to raise the maximum.
  */
 
-/**
- * ida_pre_get - reserve resources for ida allocation
- * @ida: ida handle
- * @gfp: memory allocation flags
- *
- * This function should be called before calling ida_get_new_above().  If it
- * is unable to allocate memory, it will return %0.  On success, it returns %1.
- */
-int ida_pre_get(struct ida *ida, gfp_t gfp)
-{
-	struct ida_bitmap *bitmap;
-
-	/*
-	 * This looks weird, but the IDA API has no preload_end() equivalent.
-	 * Instead, ida_get_new() can return -EAGAIN, prompting the caller
-	 * to return to the ida_pre_get() step.
-	 */
-	idr_preload(gfp);
-	idr_preload_end();
-
-	if (!ida->free_bitmap) {
-		bitmap = kmalloc(sizeof(struct ida_bitmap), gfp);
-		if (!bitmap)
-			return 0;
-		bitmap = xchg(&ida->free_bitmap, bitmap);
-		kfree(bitmap);
-	}
-
-	return 1;
-}
-EXPORT_SYMBOL(ida_pre_get);
-
 #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
 
 /**
@@ -292,10 +261,9 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			new += bit;
 			if (new < 0)
 				return -ENOSPC;
-			bitmap = ida->free_bitmap;
+			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			ida->free_bitmap = NULL;
 			memset(bitmap, 0, sizeof(*bitmap));
 			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
@@ -361,9 +329,6 @@ void ida_destroy(struct ida *ida)
 		kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
-
-	kfree(ida->free_bitmap);
-	ida->free_bitmap = NULL;
 }
 EXPORT_SYMBOL(ida_destroy);
 
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index eaea14b8f2ca..7b9f8515033e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -69,6 +69,14 @@ static struct kmem_cache *radix_tree_node_cachep;
 						RADIX_TREE_MAP_SHIFT))
 #define IDR_PRELOAD_SIZE	(IDR_MAX_PATH * 2 - 1)
 
+/*
+ * The IDA is even shorter since it uses a bitmap at the last level.
+ */
+#define IDA_INDEX_BITS		(8 * sizeof(int) - 1 - ilog2(IDA_BITMAP_BITS))
+#define IDA_MAX_PATH		(DIV_ROUND_UP(IDA_INDEX_BITS, \
+						RADIX_TREE_MAP_SHIFT))
+#define IDA_PRELOAD_SIZE	(IDA_MAX_PATH * 2 - 1)
+
 /*
  * Per-cpu pool of preloaded nodes
  */
@@ -346,9 +354,8 @@ static void dump_ida_node(void *entry, unsigned long index)
 static void ida_dump(struct ida *ida)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
-	pr_debug("ida: %p %p free %d bitmap %p\n", ida, root->rnode,
-				root->gfp_mask >> ROOT_TAG_SHIFT,
-				ida->free_bitmap);
+	pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
+				root->gfp_mask >> ROOT_TAG_SHIFT);
 	dump_ida_node(root->rnode, 0);
 }
 #endif
@@ -2080,6 +2087,36 @@ void idr_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_preload);
 
+/**
+ * ida_pre_get - reserve resources for ida allocation
+ * @ida: ida handle
+ * @gfp: memory allocation flags
+ *
+ * This function should be called before calling ida_get_new_above().  If it
+ * is unable to allocate memory, it will return %0.  On success, it returns %1.
+ */
+int ida_pre_get(struct ida *ida, gfp_t gfp)
+{
+	__radix_tree_preload(gfp, IDA_PRELOAD_SIZE);
+	/*
+	 * The IDA API has no preload_end() equivalent.  Instead,
+	 * ida_get_new() can return -EAGAIN, prompting the caller
+	 * to return to the ida_pre_get() step.
+	 */
+	preempt_enable();
+
+	if (!this_cpu_read(ida_bitmap)) {
+		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+		if (!bitmap)
+			return 0;
+		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
+		kfree(bitmap);
+	}
+
+	return 1;
+}
+EXPORT_SYMBOL(ida_pre_get);
+
 void **idr_get_free(struct radix_tree_root *root,
 			struct radix_tree_iter *iter, gfp_t gfp, int end)
 {
@@ -2219,6 +2256,8 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 		kmem_cache_free(radix_tree_node_cachep, node);
 		rtp->nr--;
 	}
+	kfree(per_cpu(ida_bitmap, cpu));
+	per_cpu(ida_bitmap, cpu) = NULL;
 	return 0;
 }
 
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 63fce553781a..677b8c0f60f9 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -24,6 +24,4 @@
 
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 
-#define xchg(ptr, x)	uatomic_xchg(ptr, x)
-
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h
index 5837f1d56f17..3ea01a1a88c2 100644
--- a/tools/testing/radix-tree/linux/percpu.h
+++ b/tools/testing/radix-tree/linux/percpu.h
@@ -1,7 +1,10 @@
-
+#define DECLARE_PER_CPU(type, val) extern type val
 #define DEFINE_PER_CPU(type, val) type val
 
 #define __get_cpu_var(var)	var
 #define this_cpu_ptr(var)	var
+#define this_cpu_read(var)	var
+#define this_cpu_xchg(var, val)		uatomic_xchg(&var, val)
+#define this_cpu_cmpxchg(var, old, new)	uatomic_cmpxchg(&var, old, new)
 #define per_cpu_ptr(ptr, cpu)   ({ (void)(cpu); (ptr); })
 #define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
-- 
cgit v1.2.3


From d37cacc5adace7f3e0824e1f559192ad7299d029 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 17 Dec 2016 08:18:17 -0500
Subject: ida: Use exceptional entries for small IDAs

We can use the root entry as a bitmap and save allocating a 128 byte
bitmap for an IDA that contains only a few entries (30 on a 32-bit
machine, 62 on a 64-bit machine).  This costs about 300 bytes of kernel
text on x86-64, so as long as 3 IDAs fall into this category, this
is a net win for memory consumption.

Thanks to Rasmus Villemoes for his work documenting the problem and
collecting statistics on IDAs.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/idr.c                           | 87 +++++++++++++++++++++++++++++++---
 lib/radix-tree.c                    |  8 ++++
 tools/testing/radix-tree/idr-test.c | 93 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 181 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/lib/idr.c b/lib/idr.c
index 2abd7769c430..7d25e240bc5a 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -194,6 +194,39 @@ EXPORT_SYMBOL(idr_replace);
  * limitation, it should be quite straightforward to raise the maximum.
  */
 
+/*
+ * Developer's notes:
+ *
+ * The IDA uses the functionality provided by the IDR & radix tree to store
+ * bitmaps in each entry.  The IDR_FREE tag means there is at least one bit
+ * free, unlike the IDR where it means at least one entry is free.
+ *
+ * I considered telling the radix tree that each slot is an order-10 node
+ * and storing the bit numbers in the radix tree, but the radix tree can't
+ * allow a single multiorder entry at index 0, which would significantly
+ * increase memory consumption for the IDA.  So instead we divide the index
+ * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ *
+ * As an optimisation, if there are only a few low bits set in any given
+ * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
+ * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
+ * directly in the entry.  By being really tricksy, we could store
+ * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
+ * for 0-3 allocated IDs.
+ *
+ * We allow the radix tree 'exceptional' count to get out of date.  Nothing
+ * in the IDA nor the radix tree code checks it.  If it becomes important
+ * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
+ * calls to radix_tree_iter_replace() which will correct the exceptional
+ * count.
+ *
+ * The IDA always requires a lock to alloc/free.  If we add a 'test_bit'
+ * equivalent, it will still need locking.  Going to RCU lookup would require
+ * using RCU to free bitmaps, and that's not trivial without embedding an
+ * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
+ * bitmap, which is excessive.
+ */
+
 #define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
 
 /**
@@ -221,11 +254,12 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
 	unsigned long index;
-	unsigned bit;
+	unsigned bit, ebit;
 	int new;
 
 	index = start / IDA_BITMAP_BITS;
 	bit = start % IDA_BITMAP_BITS;
+	ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
 
 	slot = radix_tree_iter_init(&iter, index);
 	for (;;) {
@@ -240,10 +274,29 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 				return PTR_ERR(slot);
 			}
 		}
-		if (iter.index > index)
+		if (iter.index > index) {
 			bit = 0;
+			ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
+		}
 		new = iter.index * IDA_BITMAP_BITS;
 		bitmap = rcu_dereference_raw(*slot);
+		if (radix_tree_exception(bitmap)) {
+			unsigned long tmp = (unsigned long)bitmap;
+			ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
+			if (ebit < BITS_PER_LONG) {
+				tmp |= 1UL << ebit;
+				rcu_assign_pointer(*slot, (void *)tmp);
+				*id = new + ebit - RADIX_TREE_EXCEPTIONAL_SHIFT;
+				return 0;
+			}
+			bitmap = this_cpu_xchg(ida_bitmap, NULL);
+			if (!bitmap)
+				return -EAGAIN;
+			memset(bitmap, 0, sizeof(*bitmap));
+			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+			rcu_assign_pointer(*slot, bitmap);
+		}
+
 		if (bitmap) {
 			bit = find_next_zero_bit(bitmap->bitmap,
 							IDA_BITMAP_BITS, bit);
@@ -261,6 +314,14 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			new += bit;
 			if (new < 0)
 				return -ENOSPC;
+			if (ebit < BITS_PER_LONG) {
+				bitmap = (void *)((1UL << ebit) |
+						RADIX_TREE_EXCEPTIONAL_ENTRY);
+				radix_tree_iter_replace(root, &iter, slot,
+						bitmap);
+				*id = new;
+				return 0;
+			}
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
@@ -287,6 +348,7 @@ void ida_remove(struct ida *ida, int id)
 	unsigned long index = id / IDA_BITMAP_BITS;
 	unsigned offset = id % IDA_BITMAP_BITS;
 	struct ida_bitmap *bitmap;
+	unsigned long *btmp;
 	struct radix_tree_iter iter;
 	void **slot;
 
@@ -295,12 +357,24 @@ void ida_remove(struct ida *ida, int id)
 		goto err;
 
 	bitmap = rcu_dereference_raw(*slot);
-	if (!test_bit(offset, bitmap->bitmap))
+	if (radix_tree_exception(bitmap)) {
+		btmp = (unsigned long *)slot;
+		offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
+		if (offset >= BITS_PER_LONG)
+			goto err;
+	} else {
+		btmp = bitmap->bitmap;
+	}
+	if (!test_bit(offset, btmp))
 		goto err;
 
-	__clear_bit(offset, bitmap->bitmap);
+	__clear_bit(offset, btmp);
 	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
-	if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+	if (radix_tree_exception(bitmap)) {
+		if (rcu_dereference_raw(*slot) ==
+					(void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
+			radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+	} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
 		kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
@@ -326,7 +400,8 @@ void ida_destroy(struct ida *ida)
 
 	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
 		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
-		kfree(bitmap);
+		if (!radix_tree_exception(bitmap))
+			kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 7b9f8515033e..14130ab197c0 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -338,6 +338,14 @@ static void dump_ida_node(void *entry, unsigned long index)
 		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
 			dump_ida_node(node->slots[i],
 					index | (i << node->shift));
+	} else if (radix_tree_exceptional_entry(entry)) {
+		pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
+				entry, (int)(index & RADIX_TREE_MAP_MASK),
+				index * IDA_BITMAP_BITS,
+				index * IDA_BITMAP_BITS + BITS_PER_LONG -
+					RADIX_TREE_EXCEPTIONAL_SHIFT,
+				(unsigned long)entry >>
+					RADIX_TREE_EXCEPTIONAL_SHIFT);
 	} else {
 		struct ida_bitmap *bitmap = entry;
 
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 4dffad9284e0..59081122c63d 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -11,6 +11,7 @@
  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  * more details.
  */
+#include <linux/bitmap.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/kernel.h>
@@ -214,7 +215,7 @@ void ida_check_nomem(void)
 	DEFINE_IDA(ida);
 	int id, err;
 
-	err = ida_get_new(&ida, &id);
+	err = ida_get_new_above(&ida, 256, &id);
 	assert(err == -EAGAIN);
 	err = ida_get_new_above(&ida, 1UL << 30, &id);
 	assert(err == -EAGAIN);
@@ -246,6 +247,66 @@ void ida_check_leaf(void)
 	assert(ida_is_empty(&ida));
 }
 
+/*
+ * Check handling of conversions between exceptional entries and full bitmaps.
+ */
+void ida_check_conv(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+	unsigned long i;
+
+	for (i = 0; i < IDA_BITMAP_BITS * 2; i += IDA_BITMAP_BITS) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new_above(&ida, i + 1, &id));
+		assert(id == i + 1);
+		assert(!ida_get_new_above(&ida, i + BITS_PER_LONG, &id));
+		assert(id == i + BITS_PER_LONG);
+		ida_remove(&ida, i + 1);
+		ida_remove(&ida, i + BITS_PER_LONG);
+		assert(ida_is_empty(&ida));
+	}
+
+	assert(ida_pre_get(&ida, GFP_KERNEL));
+
+	for (i = 0; i < IDA_BITMAP_BITS * 2; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	for (i = IDA_BITMAP_BITS * 2; i > 0; i--) {
+		ida_remove(&ida, i - 1);
+	}
+	assert(ida_is_empty(&ida));
+
+	for (i = 0; i < IDA_BITMAP_BITS + BITS_PER_LONG - 4; i++) {
+		assert(ida_pre_get(&ida, GFP_KERNEL));
+		assert(!ida_get_new(&ida, &id));
+		assert(id == i);
+	}
+
+	for (i = IDA_BITMAP_BITS + BITS_PER_LONG - 4; i > 0; i--) {
+		ida_remove(&ida, i - 1);
+	}
+	assert(ida_is_empty(&ida));
+
+	radix_tree_cpu_dead(1);
+	for (i = 0; i < 1000000; i++) {
+		int err = ida_get_new(&ida, &id);
+		if (err == -EAGAIN) {
+			assert((i % IDA_BITMAP_BITS) == (BITS_PER_LONG - 2));
+			assert(ida_pre_get(&ida, GFP_KERNEL));
+			err = ida_get_new(&ida, &id);
+		} else {
+			assert((i % IDA_BITMAP_BITS) != (BITS_PER_LONG - 2));
+		}
+		assert(!err);
+		assert(id == i);
+	}
+	ida_destroy(&ida);
+}
+
 /*
  * Check allocations up to and slightly above the maximum allowed (2^31-1) ID.
  * Allocating up to 2^31-1 should succeed, and then allocating the next one
@@ -273,6 +334,34 @@ void ida_check_max(void)
 	}
 }
 
+void ida_check_random(void)
+{
+	DEFINE_IDA(ida);
+	DECLARE_BITMAP(bitmap, 2048);
+	int id;
+	unsigned int i;
+	time_t s = time(NULL);
+
+ repeat:
+	memset(bitmap, 0, sizeof(bitmap));
+	for (i = 0; i < 100000; i++) {
+		int i = rand();
+		int bit = i & 2047;
+		if (test_bit(bit, bitmap)) {
+			__clear_bit(bit, bitmap);
+			ida_remove(&ida, bit);
+		} else {
+			__set_bit(bit, bitmap);
+			ida_pre_get(&ida, GFP_KERNEL);
+			assert(!ida_get_new_above(&ida, bit, &id));
+			assert(id == bit);
+		}
+	}
+	ida_destroy(&ida);
+	if (time(NULL) < s + 10)
+		goto repeat;
+}
+
 void ida_checks(void)
 {
 	DEFINE_IDA(ida);
@@ -337,6 +426,8 @@ void ida_checks(void)
 
 	ida_check_leaf();
 	ida_check_max();
+	ida_check_conv();
+	ida_check_random();
 
 	radix_tree_cpu_dead(1);
 }
-- 
cgit v1.2.3


From 8ac04868315c6ffcb2c5a5ad9cd5cec61cad3576 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 18 Dec 2016 22:56:05 -0500
Subject: radix tree test suite: Build separate binaries for some tests

To allow developers to run a subset of tests, build separate multiorder
and idr-test binaries which will run just the tests in those files.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/.gitignore   |  4 +++-
 tools/testing/radix-tree/Makefile     | 18 ++++++++++++------
 tools/testing/radix-tree/idr-test.c   | 11 +++++++++++
 tools/testing/radix-tree/multiorder.c |  7 +++++++
 4 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 3b5534b643f0..26dedaf57aab 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,3 +1,5 @@
+idr.c
+idr-test
 main
+multiorder
 radix-tree.c
-idr.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 3597a3a9f269..4aba7b75e43a 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,10 +1,10 @@
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
 LDFLAGS += -lpthread -lurcu
-TARGETS = main
-OFILES = main.o radix-tree.o idr.o linux.o test.o tag_check.o find_bit.o \
-	 regression1.o regression2.o regression3.o multiorder.o idr-test.o \
-	 iteration_check.o benchmark.o
+TARGETS = main idr-test multiorder
+CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
 ifdef BENCHMARK
 	CFLAGS += -DBENCHMARK=1
@@ -13,10 +13,16 @@ endif
 targets: $(TARGETS)
 
 main:	$(OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $(OFILES) -o main
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
+
+idr-test: idr-test.o $(CORE_OFILES)
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
+
+multiorder: multiorder.o $(CORE_OFILES)
+	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
-	$(RM) -f $(TARGETS) *.o radix-tree.c
+	$(RM) $(TARGETS) *.o radix-tree.c idr.c
 
 vpath %.c ../../lib
 
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 59081122c63d..a26098c6123d 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -431,3 +431,14 @@ void ida_checks(void)
 
 	radix_tree_cpu_dead(1);
 }
+
+int __weak main(void)
+{
+	radix_tree_init();
+	idr_checks();
+	ida_checks();
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index f79812a5e070..e465a3811b97 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -633,3 +633,10 @@ void multiorder_checks(void)
 
 	radix_tree_cpu_dead(0);
 }
+
+int __weak main(void)
+{
+	radix_tree_init();
+	multiorder_checks();
+	return 0;
+}
-- 
cgit v1.2.3


From 5eeb2d23df29212f901a36dabf16f93d8bd50814 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 24 Dec 2016 07:49:18 -0500
Subject: radix tree test suite: Introduce kmalloc_verbose

To help track down where memory leaks may be, add the ability to turn
on/off printing allocations, frees and delayed frees.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/linux.c            |  9 +++++++++
 tools/testing/radix-tree/linux/radix-tree.h | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 93f06614e91b..3e4f9f36da2e 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -13,6 +13,7 @@
 
 int nr_allocated;
 int preempt_count;
+int kmalloc_verbose;
 
 struct kmem_cache {
 	pthread_mutex_t lock;
@@ -44,6 +45,8 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 	}
 
 	uatomic_inc(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from slab\n", node);
 	return node;
 }
 
@@ -51,6 +54,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 {
 	assert(objp);
 	uatomic_dec(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to slab\n", objp);
 	pthread_mutex_lock(&cachep->lock);
 	if (cachep->nr_objs > 10) {
 		memset(objp, POISON_FREE, cachep->size);
@@ -68,6 +73,8 @@ void *kmalloc(size_t size, gfp_t gfp)
 {
 	void *ret = malloc(size);
 	uatomic_inc(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from malloc\n", ret);
 	return ret;
 }
 
@@ -76,6 +83,8 @@ void kfree(void *p)
 	if (!p)
 		return;
 	uatomic_dec(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to malloc\n", p);
 	free(p);
 }
 
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index ce694ddd4aea..f4d8532e1bef 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -1 +1,18 @@
+#ifndef _TEST_RADIX_TREE_H
+#define _TEST_RADIX_TREE_H
 #include "../../../../include/linux/radix-tree.h"
+
+extern int kmalloc_verbose;
+
+static inline void trace_call_rcu(struct rcu_head *head,
+		void (*func)(struct rcu_head *head))
+{
+	if (kmalloc_verbose)
+		printf("Delaying free of %p to slab\n", (char *)head -
+				offsetof(struct radix_tree_node, rcu_head));
+	call_rcu(head, func);
+}
+#undef call_rcu
+#define call_rcu(x, y) trace_call_rcu(x, y)
+
+#endif /* _TEST_RADIX_TREE_H */
-- 
cgit v1.2.3


From 73bc029b76482260a144219786d19951f561716e Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Wed, 4 Jan 2017 11:55:00 -0500
Subject: radix tree test suite: Dial down verbosity with -v

Make the output of radix tree test suite less verbose by default and add
-v and -vv command line options for increasing level of verbosity.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c        |  6 ++--
 tools/testing/radix-tree/iteration_check.c  |  2 +-
 tools/testing/radix-tree/linux.c            |  1 +
 tools/testing/radix-tree/linux/radix-tree.h |  6 ++++
 tools/testing/radix-tree/main.c             | 45 ++++++++++++++++++-----------
 tools/testing/radix-tree/multiorder.c       | 17 ++++++-----
 tools/testing/radix-tree/regression1.c      |  4 +--
 tools/testing/radix-tree/regression2.c      |  4 +--
 tools/testing/radix-tree/regression3.c      | 28 +++++++++---------
 tools/testing/radix-tree/tag_check.c        | 22 +++++++-------
 10 files changed, 77 insertions(+), 58 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 215ca86c7605..9b09ddfe462f 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -71,7 +71,7 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printf("Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
+	printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
 		size, step, order, tagged, normal);
 
 	item_kill_tree(&tree);
@@ -85,8 +85,8 @@ void benchmark(void)
 				128, 256, 512, 12345, 0};
 	int c, s;
 
-	printf("starting benchmarks\n");
-	printf("RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
+	printv(1, "starting benchmarks\n");
+	printv(1, "RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
 
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index 7572b7ed930e..a92bab513701 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -177,7 +177,7 @@ void iteration_test(unsigned order, unsigned test_duration)
 {
 	int i;
 
-	printf("Running %siteration tests for %d seconds\n",
+	printv(1, "Running %siteration tests for %d seconds\n",
 			order > 0 ? "multiorder " : "", test_duration);
 
 	max_order = order;
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 3e4f9f36da2e..94bcdb992bbf 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -14,6 +14,7 @@
 int nr_allocated;
 int preempt_count;
 int kmalloc_verbose;
+int test_verbose;
 
 struct kmem_cache {
 	pthread_mutex_t lock;
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index f4d8532e1bef..ddd135fa3af7 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -3,6 +3,7 @@
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
+extern int test_verbose;
 
 static inline void trace_call_rcu(struct rcu_head *head,
 		void (*func)(struct rcu_head *head))
@@ -12,6 +13,11 @@ static inline void trace_call_rcu(struct rcu_head *head,
 				offsetof(struct radix_tree_node, rcu_head));
 	call_rcu(head, func);
 }
+
+#define printv(verbosity_level, fmt, ...) \
+	if(test_verbose >= verbosity_level) \
+		printf(fmt, ##__VA_ARGS__)
+
 #undef call_rcu
 #define call_rcu(x, y) trace_call_rcu(x, y)
 
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index ddd90a11db3f..86de448b699e 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -68,7 +68,7 @@ void big_gang_check(bool long_run)
 
 	for (i = 0; i < (long_run ? 1000 : 3); i++) {
 		__big_gang_check();
-		printf("%d ", i);
+		printv(2, "%d ", i);
 		fflush(stdout);
 	}
 }
@@ -129,14 +129,19 @@ void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsign
 			putchar('.'); */
 		if (idx[i] < start || idx[i] > end) {
 			if (item_tag_get(tree, idx[i], totag)) {
-				printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+				printv(2, "%lu-%lu: %lu, tags %d-%d\n", start,
+				       end, idx[i], item_tag_get(tree, idx[i],
+								 fromtag),
+				       item_tag_get(tree, idx[i], totag));
 			}
 			assert(!item_tag_get(tree, idx[i], totag));
 			continue;
 		}
 		if (item_tag_get(tree, idx[i], fromtag) ^
 			item_tag_get(tree, idx[i], totag)) {
-			printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+			printv(2, "%lu-%lu: %lu, tags %d-%d\n", start, end,
+			       idx[i], item_tag_get(tree, idx[i], fromtag),
+			       item_tag_get(tree, idx[i], totag));
 		}
 		assert(!(item_tag_get(tree, idx[i], fromtag) ^
 			 item_tag_get(tree, idx[i], totag)));
@@ -238,7 +243,7 @@ static void __locate_check(struct radix_tree_root *tree, unsigned long index,
 	item = item_lookup(tree, index);
 	index2 = find_item(tree, item);
 	if (index != index2) {
-		printf("index %ld order %d inserted; found %ld\n",
+		printv(2, "index %ld order %d inserted; found %ld\n",
 			index, order, index2);
 		abort();
 	}
@@ -289,48 +294,48 @@ static void single_thread_tests(bool long_run)
 {
 	int i;
 
-	printf("starting single_thread_tests: %d allocated, preempt %d\n",
+	printv(1, "starting single_thread_tests: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	multiorder_checks();
 	rcu_barrier();
-	printf("after multiorder_check: %d allocated, preempt %d\n",
+	printv(2, "after multiorder_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	locate_check();
 	rcu_barrier();
-	printf("after locate_check: %d allocated, preempt %d\n",
+	printv(2, "after locate_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	tag_check();
 	rcu_barrier();
-	printf("after tag_check: %d allocated, preempt %d\n",
+	printv(2, "after tag_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	gang_check();
 	rcu_barrier();
-	printf("after gang_check: %d allocated, preempt %d\n",
+	printv(2, "after gang_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	add_and_check();
 	rcu_barrier();
-	printf("after add_and_check: %d allocated, preempt %d\n",
+	printv(2, "after add_and_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	dynamic_height_check();
 	rcu_barrier();
-	printf("after dynamic_height_check: %d allocated, preempt %d\n",
+	printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	idr_checks();
 	ida_checks();
 	rcu_barrier();
-	printf("after idr_checks: %d allocated, preempt %d\n",
+	printv(2, "after idr_checks: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	big_gang_check(long_run);
 	rcu_barrier();
-	printf("after big_gang_check: %d allocated, preempt %d\n",
+	printv(2, "after big_gang_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	for (i = 0; i < (long_run ? 2000 : 3); i++) {
 		copy_tag_check();
-		printf("%d ", i);
+		printv(2, "%d ", i);
 		fflush(stdout);
 	}
 	rcu_barrier();
-	printf("after copy_tag_check: %d allocated, preempt %d\n",
+	printv(2, "after copy_tag_check: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 }
 
@@ -340,16 +345,20 @@ int main(int argc, char **argv)
 	int opt;
 	unsigned int seed = time(NULL);
 
-	while ((opt = getopt(argc, argv, "ls:")) != -1) {
+	while ((opt = getopt(argc, argv, "ls:v")) != -1) {
 		if (opt == 'l')
 			long_run = true;
 		else if (opt == 's')
 			seed = strtoul(optarg, NULL, 0);
+		else if (opt == 'v')
+			test_verbose++;
 	}
 
 	printf("random seed %u\n", seed);
 	srand(seed);
 
+	printf("running tests\n");
+
 	rcu_register_thread();
 	radix_tree_init();
 
@@ -366,9 +375,11 @@ int main(int argc, char **argv)
 	benchmark();
 
 	rcu_barrier();
-	printf("after rcu_barrier: %d allocated, preempt %d\n",
+	printv(2, "after rcu_barrier: %d allocated, preempt %d\n",
 		nr_allocated, preempt_count);
 	rcu_unregister_thread();
 
+	printf("tests completed\n");
+
 	exit(0);
 }
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index e465a3811b97..72d80f7059d3 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -30,7 +30,7 @@ static void __multiorder_tag_test(int index, int order)
 	/* our canonical entry */
 	base = index & ~((1 << order) - 1);
 
-	printf("Multiorder tag test with index %d, canonical entry %d\n",
+	printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
 			index, base);
 
 	err = item_insert_order(&tree, index, order);
@@ -150,7 +150,7 @@ static void multiorder_check(unsigned long index, int order)
 	struct item *item2 = item_create(min, order);
 	RADIX_TREE(tree, GFP_KERNEL);
 
-	printf("Multiorder index %ld, order %d\n", index, order);
+	printv(2, "Multiorder index %ld, order %d\n", index, order);
 
 	assert(item_insert_order(&tree, index, order) == 0);
 
@@ -188,7 +188,7 @@ static void multiorder_shrink(unsigned long index, int order)
 	RADIX_TREE(tree, GFP_KERNEL);
 	struct radix_tree_node *node;
 
-	printf("Multiorder shrink index %ld, order %d\n", index, order);
+	printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
 
 	assert(item_insert_order(&tree, 0, order) == 0);
 
@@ -209,7 +209,8 @@ static void multiorder_shrink(unsigned long index, int order)
 		item_check_absent(&tree, i);
 
 	if (!item_delete(&tree, 0)) {
-		printf("failed to delete index %ld (order %d)\n", index, order);		abort();
+		printv(2, "failed to delete index %ld (order %d)\n", index, order);
+		abort();
 	}
 
 	for (i = 0; i < 2*max; i++)
@@ -234,7 +235,7 @@ void multiorder_iteration(void)
 	void **slot;
 	int i, j, err;
 
-	printf("Multiorder iteration test\n");
+	printv(1, "Multiorder iteration test\n");
 
 #define NUM_ENTRIES 11
 	int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
@@ -275,7 +276,7 @@ void multiorder_tagged_iteration(void)
 	void **slot;
 	int i, j;
 
-	printf("Multiorder tagged iteration test\n");
+	printv(1, "Multiorder tagged iteration test\n");
 
 #define MT_NUM_ENTRIES 9
 	int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
@@ -453,7 +454,7 @@ static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
 {
 	struct radix_tree_preload *rtp = &radix_tree_preloads;
 	if (rtp->nr != 0)
-		printf("split(%u %u) remaining %u\n", old_order, new_order,
+		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
 							rtp->nr);
 	/*
 	 * Can't check for equality here as some nodes may have been
@@ -461,7 +462,7 @@ static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
 	 * nodes allocated since they should have all been preloaded.
 	 */
 	if (nr_allocated > alloc)
-		printf("split(%u %u) allocated %u %u\n", old_order, new_order,
+		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
 							alloc, nr_allocated);
 }
 
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index 0d6813a61b37..bf97742fc18c 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -193,7 +193,7 @@ void regression1_test(void)
 	long arg;
 
 	/* Regression #1 */
-	printf("running regression test 1, should finish in under a minute\n");
+	printv(1, "running regression test 1, should finish in under a minute\n");
 	nr_threads = 2;
 	pthread_barrier_init(&worker_barrier, NULL, nr_threads);
 
@@ -216,5 +216,5 @@ void regression1_test(void)
 
 	free(threads);
 
-	printf("regression test 1, done\n");
+	printv(1, "regression test 1, done\n");
 }
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index a41325d7a170..a24d1beec7c8 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -80,7 +80,7 @@ void regression2_test(void)
 	unsigned long int start, end;
 	struct page *pages[1];
 
-	printf("running regression test 2 (should take milliseconds)\n");
+	printv(1, "running regression test 2 (should take milliseconds)\n");
 	/* 0. */
 	for (i = 0; i <= max_slots - 1; i++) {
 		p = page_alloc();
@@ -116,5 +116,5 @@ void regression2_test(void)
 	/* We remove all the remained nodes */
 	radix_tree_delete(&mt_tree, max_slots);
 
-	printf("regression test 2, done\n");
+	printv(1, "regression test 2, done\n");
 }
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index b594841fae85..670c3d2ae7b1 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -34,21 +34,21 @@ void regression3_test(void)
 	void **slot;
 	bool first;
 
-	printf("running regression test 3 (should take milliseconds)\n");
+	printv(1, "running regression test 3 (should take milliseconds)\n");
 
 	radix_tree_insert(&root, 0, ptr0);
 	radix_tree_tag_set(&root, 0, 0);
 
 	first = true;
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-		printf("tagged %ld %p\n", iter.index, *slot);
+		printv(2, "tagged %ld %p\n", iter.index, *slot);
 		if (first) {
 			radix_tree_insert(&root, 1, ptr);
 			radix_tree_tag_set(&root, 1, 0);
 			first = false;
 		}
 		if (radix_tree_deref_retry(*slot)) {
-			printf("retry at %ld\n", iter.index);
+			printv(2, "retry at %ld\n", iter.index);
 			slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
@@ -57,13 +57,13 @@ void regression3_test(void)
 
 	first = true;
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
-		printf("slot %ld %p\n", iter.index, *slot);
+		printv(2, "slot %ld %p\n", iter.index, *slot);
 		if (first) {
 			radix_tree_insert(&root, 1, ptr);
 			first = false;
 		}
 		if (radix_tree_deref_retry(*slot)) {
-			printk("retry at %ld\n", iter.index);
+			printv(2, "retry at %ld\n", iter.index);
 			slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
@@ -72,30 +72,30 @@ void regression3_test(void)
 
 	first = true;
 	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printk("contig %ld %p\n", iter.index, *slot);
+		printv(2, "contig %ld %p\n", iter.index, *slot);
 		if (first) {
 			radix_tree_insert(&root, 1, ptr);
 			first = false;
 		}
 		if (radix_tree_deref_retry(*slot)) {
-			printk("retry at %ld\n", iter.index);
+			printv(2, "retry at %ld\n", iter.index);
 			slot = radix_tree_iter_retry(&iter);
 			continue;
 		}
 	}
 
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
-		printf("slot %ld %p\n", iter.index, *slot);
+		printv(2, "slot %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
-			printf("next at %ld\n", iter.index);
+			printv(2, "next at %ld\n", iter.index);
 			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
 
 	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printf("contig %ld %p\n", iter.index, *slot);
+		printv(2, "contig %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
-			printf("next at %ld\n", iter.index);
+			printv(2, "next at %ld\n", iter.index);
 			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
@@ -103,9 +103,9 @@ void regression3_test(void)
 	radix_tree_tag_set(&root, 0, 0);
 	radix_tree_tag_set(&root, 1, 0);
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-		printf("tagged %ld %p\n", iter.index, *slot);
+		printv(2, "tagged %ld %p\n", iter.index, *slot);
 		if (!iter.index) {
-			printf("next at %ld\n", iter.index);
+			printv(2, "next at %ld\n", iter.index);
 			slot = radix_tree_iter_resume(slot, &iter);
 		}
 	}
@@ -113,5 +113,5 @@ void regression3_test(void)
 	radix_tree_delete(&root, 0);
 	radix_tree_delete(&root, 1);
 
-	printf("regression test 3 passed\n");
+	printv(1, "regression test 3 passed\n");
 }
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index fd98c132207a..d4ff00989245 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -49,10 +49,10 @@ void simple_checks(void)
 	}
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	printf("before item_kill_tree: %d allocated\n", nr_allocated);
+	printv(2, "before item_kill_tree: %d allocated\n", nr_allocated);
 	item_kill_tree(&tree);
 	rcu_barrier();
-	printf("after item_kill_tree: %d allocated\n", nr_allocated);
+	printv(2, "after item_kill_tree: %d allocated\n", nr_allocated);
 }
 
 /*
@@ -257,7 +257,7 @@ static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
 
 		gang_check(tree, thrash_state, tag);
 
-		printf("%d(%d) %d(%d) %d(%d) %d(%d) / "
+		printv(2, "%d(%d) %d(%d) %d(%d) %d(%d) / "
 				"%d(%d) present, %d(%d) tagged\n",
 			insert_chunk, nr_inserted,
 			delete_chunk, nr_deleted,
@@ -296,13 +296,13 @@ static void __leak_check(void)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 	item_insert(&tree, 1000000);
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 	item_delete(&tree, 1000000);
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 	item_kill_tree(&tree);
-	printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
 }
 
 static void single_check(void)
@@ -336,15 +336,15 @@ void tag_check(void)
 	extend_checks();
 	contract_checks();
 	rcu_barrier();
-	printf("after extend_checks: %d allocated\n", nr_allocated);
+	printv(2, "after extend_checks: %d allocated\n", nr_allocated);
 	__leak_check();
 	leak_check();
 	rcu_barrier();
-	printf("after leak_check: %d allocated\n", nr_allocated);
+	printv(2, "after leak_check: %d allocated\n", nr_allocated);
 	simple_checks();
 	rcu_barrier();
-	printf("after simple_checks: %d allocated\n", nr_allocated);
+	printv(2, "after simple_checks: %d allocated\n", nr_allocated);
 	thrash_tags();
 	rcu_barrier();
-	printf("after thrash_tags: %d allocated\n", nr_allocated);
+	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
 }
-- 
cgit v1.2.3


From 1293d5c5f54d5118fbb34fc94e01ba02fcd25fc1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Mon, 16 Jan 2017 16:41:29 -0500
Subject: radix-tree: Chain preallocated nodes through ->parent

Chaining through the ->private_data member means we have to zero
->private_data after removing preallocated nodes from the list.
We're about to initialise ->parent anyway, so we can avoid zeroing it.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/radix-tree.c                 | 9 ++++-----
 tools/testing/radix-tree/linux.c | 6 +++---
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 14130ab197c0..66c71312c381 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -82,7 +82,7 @@ static struct kmem_cache *radix_tree_node_cachep;
  */
 struct radix_tree_preload {
 	unsigned nr;
-	/* nodes->private_data points to next preallocated node */
+	/* nodes->parent points to next preallocated node */
 	struct radix_tree_node *nodes;
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
@@ -405,8 +405,7 @@ radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr) {
 			ret = rtp->nodes;
-			rtp->nodes = ret->private_data;
-			ret->private_data = NULL;
+			rtp->nodes = ret->parent;
 			rtp->nr--;
 		}
 		/*
@@ -483,7 +482,7 @@ static int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 		preempt_disable();
 		rtp = this_cpu_ptr(&radix_tree_preloads);
 		if (rtp->nr < nr) {
-			node->private_data = rtp->nodes;
+			node->parent = rtp->nodes;
 			rtp->nodes = node;
 			rtp->nr++;
 		} else {
@@ -2260,7 +2259,7 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 	rtp = &per_cpu(radix_tree_preloads, cpu);
 	while (rtp->nr) {
 		node = rtp->nodes;
-		rtp->nodes = node->private_data;
+		rtp->nodes = node->parent;
 		kmem_cache_free(radix_tree_node_cachep, node);
 		rtp->nr--;
 	}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 94bcdb992bbf..cf48c8473f48 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -35,9 +35,9 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
 	if (cachep->nr_objs) {
 		cachep->nr_objs--;
 		node = cachep->objs;
-		cachep->objs = node->private_data;
+		cachep->objs = node->parent;
 		pthread_mutex_unlock(&cachep->lock);
-		node->private_data = NULL;
+		node->parent = NULL;
 	} else {
 		pthread_mutex_unlock(&cachep->lock);
 		node = malloc(cachep->size);
@@ -64,7 +64,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
 	} else {
 		struct radix_tree_node *node = objp;
 		cachep->nr_objs++;
-		node->private_data = cachep->objs;
+		node->parent = cachep->objs;
 		cachep->objs = node;
 	}
 	pthread_mutex_unlock(&cachep->lock);
-- 
cgit v1.2.3


From 829f83d3653ef7105187b0110887dbfd46a30ce4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sat, 28 Jan 2017 10:03:48 -0500
Subject: radix tree test suite: Enable address sanitizer

I was looking for a memory scribble and instead found a pile of memory
leaks.  Ensure no more occur in future.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 4aba7b75e43a..ecea846e7660 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,5 +1,5 @@
 
-CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE
+CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
 LDFLAGS += -lpthread -lurcu
 TARGETS = main idr-test multiorder
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
-- 
cgit v1.2.3


From 18d0c57394e42ff536e5fdc776b6b217fdd9889c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 01:48:34 -0500
Subject: radix tree test suite: Fix leaky tests

If item_insert() or item_insert_order() failed to insert an item, they
would leak the item they had just created.  This was causing runaway
memory consumption while running the iteration_check testcase, which
proves that Ross has too much memory in his workstation ;-)

Make sure to free the item on error.  Found with -fsanitize=address.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/test.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index e5726e373646..1a257d738a1e 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -29,15 +29,28 @@ int __item_insert(struct radix_tree_root *root, struct item *item)
 	return __radix_tree_insert(root, item->index, item->order, item);
 }
 
-int item_insert(struct radix_tree_root *root, unsigned long index)
+struct item *item_create(unsigned long index, unsigned int order)
 {
-	return __item_insert(root, item_create(index, 0));
+	struct item *ret = malloc(sizeof(*ret));
+
+	ret->index = index;
+	ret->order = order;
+	return ret;
 }
 
 int item_insert_order(struct radix_tree_root *root, unsigned long index,
 			unsigned order)
 {
-	return __item_insert(root, item_create(index, order));
+	struct item *item = item_create(index, order);
+	int err = __item_insert(root, item);
+	if (err)
+		free(item);
+	return err;
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+	return item_insert_order(root, index, 0);
 }
 
 void item_sanity(struct item *item, unsigned long index)
@@ -61,15 +74,6 @@ int item_delete(struct radix_tree_root *root, unsigned long index)
 	return 0;
 }
 
-struct item *item_create(unsigned long index, unsigned int order)
-{
-	struct item *ret = malloc(sizeof(*ret));
-
-	ret->index = index;
-	ret->order = order;
-	return ret;
-}
-
 void item_check_present(struct radix_tree_root *root, unsigned long index)
 {
 	struct item *item;
-- 
cgit v1.2.3


From 6da0396cac7692b6667c09382a746593fff90e6d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 01:52:55 -0500
Subject: radix tree test suite: Fix leaks in regression2.c

None of the malloc'ed data structures were ever being freed.  Found with
-fsanitize=address.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/regression2.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index a24d1beec7c8..42dd2a33ed24 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -103,7 +103,7 @@ void regression2_test(void)
 
 	/* 4. */
 	for (i = max_slots - 1; i >= 0; i--)
-		radix_tree_delete(&mt_tree, i);
+		free(radix_tree_delete(&mt_tree, i));
 
 	/* 5. */
 	// NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
@@ -114,7 +114,9 @@ void regression2_test(void)
 		PAGECACHE_TAG_TOWRITE);
 
 	/* We remove all the remained nodes */
-	radix_tree_delete(&mt_tree, max_slots);
+	free(radix_tree_delete(&mt_tree, max_slots));
+
+	BUG_ON(!radix_tree_empty(&mt_tree));
 
 	printv(1, "regression test 2, done\n");
 }
-- 
cgit v1.2.3


From 3b7869c31f9358a63e502c8c5c7664daf1c6d8b0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 02:00:31 -0500
Subject: radix tree test suite: Fix split/join memory leaks

The last of the memory leaks in the test suite was a couple of places in
the split/join testing where I forgot to free the element being removed
from the tree.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/multiorder.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 72d80f7059d3..06c71178d07d 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -356,6 +356,10 @@ void multiorder_tagged_iteration(void)
 	item_kill_tree(&tree);
 }
 
+/*
+ * Basic join checks: make sure we can't find an entry in the tree after
+ * a larger entry has replaced it
+ */
 static void multiorder_join1(unsigned long index,
 				unsigned order1, unsigned order2)
 {
@@ -374,6 +378,10 @@ static void multiorder_join1(unsigned long index,
 	item_kill_tree(&tree);
 }
 
+/*
+ * Check that the accounting of exceptional entries is handled correctly
+ * by joining an exceptional entry to a normal pointer.
+ */
 static void multiorder_join2(unsigned order1, unsigned order2)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
@@ -387,6 +395,9 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	assert(item2 == (void *)0x12UL);
 	assert(node->exceptional == 1);
 
+	item2 = radix_tree_lookup(&tree, 0);
+	free(item2);
+
 	radix_tree_join(&tree, 0, order1, item1);
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
 	assert(item2 == item1);
@@ -472,6 +483,7 @@ static void __multiorder_split(int old_order, int new_order)
 	void **slot;
 	struct radix_tree_iter iter;
 	unsigned alloc;
+	struct item *item;
 
 	radix_tree_preload(GFP_KERNEL);
 	assert(item_insert_order(&tree, 0, old_order) == 0);
@@ -480,7 +492,7 @@ static void __multiorder_split(int old_order, int new_order)
 	/* Wipe out the preloaded cache or it'll confuse check_mem() */
 	radix_tree_cpu_dead(0);
 
-	radix_tree_tag_set(&tree, 0, 2);
+	item = radix_tree_tag_set(&tree, 0, 2);
 
 	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
 	alloc = nr_allocated;
@@ -493,6 +505,7 @@ static void __multiorder_split(int old_order, int new_order)
 	radix_tree_preload_end();
 
 	item_kill_tree(&tree);
+	free(item);
 }
 
 static void __multiorder_split2(int old_order, int new_order)
-- 
cgit v1.2.3


From c0cdbf819cd76d977486b4634ea1b4ede2434413 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Sun, 29 Jan 2017 02:27:24 -0500
Subject: radix tree test suite: Run iteration tests for longer

If the -l flag is set, run the tests for 100 seconds each instead of
the normal 10 seconds.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
Reviewed-by: Rehas Sachdeva <aquannie@gmail.com>
---
 tools/testing/radix-tree/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 86de448b699e..b829127d5670 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -365,8 +365,8 @@ int main(int argc, char **argv)
 	regression1_test();
 	regression2_test();
 	regression3_test();
-	iteration_test(0, 10);
-	iteration_test(7, 20);
+	iteration_test(0, 10 + 90 * long_run);
+	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
 
 	/* Free any remaining preallocated nodes */
-- 
cgit v1.2.3


From c6ce3e2fe3dacda5e8afb0036c814ae9c3fee9b9 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 13 Feb 2017 16:16:03 -0500
Subject: radix tree test suite: Add config option for map shift

Add config option "SHIFT=<value>" to Makefile for building test suite
with any value of RADIX_TREE_MAP_SHIFT between 3 and 7 inclusive.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
[mawilcox@microsoft.com: .gitignore, quieten grep, remove on clean]
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/.gitignore         |  1 +
 tools/testing/radix-tree/Makefile           | 18 +++++++++++++-----
 tools/testing/radix-tree/linux/kernel.h     |  6 ------
 tools/testing/radix-tree/linux/radix-tree.h |  2 ++
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 26dedaf57aab..d4706c0ffceb 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,3 +1,4 @@
+generated/map-shift.h
 idr.c
 idr-test
 main
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index ecea846e7660..f11315bedefc 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -6,11 +6,11 @@ CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
-ifdef BENCHMARK
-	CFLAGS += -DBENCHMARK=1
+ifndef SHIFT
+	SHIFT=3
 endif
 
-targets: $(TARGETS)
+targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
 	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
@@ -22,11 +22,11 @@ multiorder: multiorder.o $(CORE_OFILES)
 	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
-	$(RM) $(TARGETS) *.o radix-tree.c idr.c
+	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
 
 vpath %.c ../../lib
 
-$(OFILES): *.h */*.h \
+$(OFILES): *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h \
@@ -37,3 +37,11 @@ radix-tree.c: ../../../lib/radix-tree.c
 
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+.PHONY: mapshift
+
+mapshift:
+	@if ! grep -qw $(SHIFT) generated/map-shift.h; then		\
+		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+				generated/map-shift.h;			\
+	fi
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 677b8c0f60f9..b21a77fddcf7 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -12,12 +12,6 @@
 #include <linux/log2.h>
 #include "../../../include/linux/kconfig.h"
 
-#ifdef BENCHMARK
-#define RADIX_TREE_MAP_SHIFT	6
-#else
-#define RADIX_TREE_MAP_SHIFT	3
-#endif
-
 #define printk printf
 #define pr_debug printk
 #define pr_cont printk
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index ddd135fa3af7..bf1bb231f9b5 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -1,5 +1,7 @@
 #ifndef _TEST_RADIX_TREE_H
 #define _TEST_RADIX_TREE_H
+
+#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
-- 
cgit v1.2.3


From d83c3ba0b9263592d1314df924b6c568f7683d29 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 9 Feb 2017 19:56:26 +1100
Subject: selftests: Fix selftests build to just build, not run tests

In commit 88baa78d1f31 ("selftests: remove duplicated all and clean
target"), the "all" target was removed from individual Makefiles and
added to lib.mk.

However the "all" target was added to lib.mk *after* the existing
"runtests" target. This means "runtests" becomes the first (default)
target for most of our Makefiles.

This has the effect of causing a plain "make" to build *and run* the
tests. Which is at best rude, but depending on which tests are run could
oops someone's build machine.

  $ make -C tools/testing/selftests/
  ...
  make[1]: Entering directory 'tools/testing/selftests/bpf'
  gcc -Wall -O2 -I../../../../usr/include   test_verifier.c -o tools/testing/selftests/bpf/test_verifier
  gcc -Wall -O2 -I../../../../usr/include   test_maps.c -o tools/testing/selftests/bpf/test_maps
  gcc -Wall -O2 -I../../../../usr/include   test_lru_map.c -o tools/testing/selftests/bpf/test_lru_map
  #0 add+sub+mul FAIL
  Failed to load prog 'Function not implemented'!
  #1 unreachable FAIL
  Unexpected error message!
  #2 unreachable2 FAIL
  ...

Fix it by moving the "all" target to the start of lib.mk, making it the
default target.

Fixes: 88baa78d1f31 ("selftests: remove duplicated all and clean target")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Tested by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 01bb7782a35e..17ed4bbe3963 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -2,6 +2,11 @@
 # Makefile can operate with or without the kbuild infrastructure.
 CC := $(CROSS_COMPILE)gcc
 
+TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+
 define RUN_TESTS
 	@for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
@@ -42,11 +47,6 @@ endef
 emit_tests:
 	$(EMIT_TESTS)
 
-TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
-TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-
-all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-
 clean:
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 
-- 
cgit v1.2.3


From 2047f1d8ba287d208272b6e26360d1a53f7ebf7d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 9 Feb 2017 19:56:27 +1100
Subject: selftests: Fix the .c linking rule

Currently we can't build some tests, for example:

  $ make -C tools/testing/selftests/ TARGETS=vm
  ...
  gcc -Wall -I ../../../../usr/include   -lrt -lpthread ../../../../usr/include/linux/kernel.h userfaultfd.c -o tools/testing/selftests/vm/userfaultfd
  /tmp/ccmOkQSM.o: In function `stress':
  userfaultfd.c:(.text+0xc60): undefined reference to `pthread_create'
  userfaultfd.c:(.text+0xca5): undefined reference to `pthread_create'
  userfaultfd.c:(.text+0xcee): undefined reference to `pthread_create'
  userfaultfd.c:(.text+0xd30): undefined reference to `pthread_create'
  userfaultfd.c:(.text+0xd77): undefined reference to `pthread_join'
  userfaultfd.c:(.text+0xe7d): undefined reference to `pthread_join'
  userfaultfd.c:(.text+0xe9f): undefined reference to `pthread_cancel'
  userfaultfd.c:(.text+0xec6): undefined reference to `pthread_join'
  userfaultfd.c:(.text+0xf14): undefined reference to `pthread_join'
  /tmp/ccmOkQSM.o: In function `userfaultfd_stress':
  userfaultfd.c:(.text+0x13e2): undefined reference to `pthread_attr_setstacksize'
  collect2: error: ld returned 1 exit status

This is because the rule for linking .c files to binaries is incorrect.

The first bug is that it uses $< (first prerequisite) instead of $^ (all
preqrequisites), fix it by using ^$.

Secondly the ordering of the prerequisites vs $(LDLIBS) is wrong,
meaning on toolchains that use --as-needed we fail to link (as above).
Fix that by placing $(LDLIBS) *after* ^$.

Finally switch to using the default rule $(LINK.c), so that we get
$(CPPFLAGS) etc. included.

Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Tested by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 17ed4bbe3963..98841c54763a 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -51,7 +51,7 @@ clean:
 	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
 
 $(OUTPUT)/%:%.c
-	$(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) $< -o $@
+	$(LINK.c) $^ $(LDLIBS) -o $@
 
 $(OUTPUT)/%.o:%.S
 	$(CC) $(ASFLAGS) -c $< -o $@
-- 
cgit v1.2.3


From 634ce97cdfa94a0c5444489b656a662fcc344536 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 9 Feb 2017 19:56:28 +1100
Subject: selftests: Fix the .S and .S -> .o rules

Both these rules incorrectly use $< (first prerequisite) rather than
$^ (all prerequisites), meaning they don't work if we're using more than
one .S file as input. Switch them to using $^.

They also don't include $(CPPFLAGS) and other variables used in the
default rules, which breaks targets that require those. Fix that by
using the builtin $(COMPILE.S) and $(LINK.S) rules.

Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Tested by: Bamvor Jian Zhang <bamvor.zhangjian@linaro.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 98841c54763a..ce96d80ad64f 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -54,9 +54,9 @@ $(OUTPUT)/%:%.c
 	$(LINK.c) $^ $(LDLIBS) -o $@
 
 $(OUTPUT)/%.o:%.S
-	$(CC) $(ASFLAGS) -c $< -o $@
+	$(COMPILE.S) $^ -o $@
 
 $(OUTPUT)/%:%.S
-	$(CC) $(ASFLAGS) $< -o $@
+	$(LINK.S) $^ $(LDLIBS) -o $@
 
 .PHONY: run_tests all clean install emit_tests
-- 
cgit v1.2.3


From 2e8ec87dad68059876c6079e27912d819061bb4e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 9 Feb 2017 19:56:29 +1100
Subject: selftests/powerpc: Fix the clean rule since recent changes

The clean rule is broken for the powerpc tests:

  make[1]: Entering directory 'tools/testing/selftests/powerpc'
  Makefile:63: warning: overriding recipe for target 'clean'
  ../lib.mk:51: warning: ignoring old recipe for target 'clean'
  /bin/sh: 3: Syntax error: end of file unexpected (expecting "done")
  Makefile:63: recipe for target 'clean' failed

Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/powerpc/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 1d48c0cab596..1c5d0575802e 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -62,7 +62,8 @@ endef
 clean:
 	@for TARGET in $(SUB_DIRS); do \
 		BUILD_TARGET=$$OUTPUT/$$TARGET;	\
-		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done;
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
+	done;
 	rm -f tags
 
 tags:
-- 
cgit v1.2.3


From 68bd42d97c30d51217b8c3b2b22fede577d8371c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 9 Feb 2017 19:56:30 +1100
Subject: selftests/powerpc: Fix remaining fallout from recent changes

In benchmarks we need to use $(TEST_GEN_PROGS) after we include lib.mk,
because lib.mk does the substitution to add $(OUTPUT).

In math the vmx and fpu names were typoed so they no longer matched
correctly, put back the 'v' and 'f'.

In tm we need to substitute $(OUTPUT) into SIGNAL_CONTEXT_CHK_TESTS so
that the rule matches.

In pmu there is an extraneous ':' on the end of $$BUILD_TARGET for the
clean and install rules, which breaks the logic in the child Makefiles.

Fixes: a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/powerpc/benchmarks/Makefile |  4 ++--
 tools/testing/selftests/powerpc/math/Makefile       | 16 ++++++++--------
 tools/testing/selftests/powerpc/pmu/Makefile        |  4 ++--
 tools/testing/selftests/powerpc/tm/Makefile         |  1 +
 4 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index 286c6ed2378c..fb96a89bd953 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -2,10 +2,10 @@ TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscal
 
 CFLAGS += -O2
 
-$(TEST_GEN_PROGS): ../harness.c
-
 include ../../lib.mk
 
+$(TEST_GEN_PROGS): ../harness.c
+
 $(OUTPUT)/context_switch: ../utils.c
 $(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
 $(OUTPUT)/context_switch: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index da9f42feaaac..fa8bae920c91 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -5,13 +5,13 @@ include ../../lib.mk
 $(TEST_GEN_PROGS): ../harness.c
 $(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
 
-$(OUTPUT)/pu_syscall: fpu_asm.S
-$(OUTPUT)/pu_preempt: fpu_asm.S
-$(OUTPUT)/pu_signal:  fpu_asm.S
+$(OUTPUT)/fpu_syscall: fpu_asm.S
+$(OUTPUT)/fpu_preempt: fpu_asm.S
+$(OUTPUT)/fpu_signal:  fpu_asm.S
 
-$(OUTPUT)/mx_syscall: vmx_asm.S
-$(OUTPUT)/mx_preempt: vmx_asm.S
-$(OUTPUT)/mx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S
+$(OUTPUT)/vmx_preempt: vmx_asm.S
+$(OUTPUT)/vmx_signal: vmx_asm.S
 
-vsx_preempt: CFLAGS += -mvsx
-vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
+$(OUTPUT)/vsx_preempt: vsx_asm.S
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 097b08acd867..e4e55d1d3e0f 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -31,12 +31,12 @@ endef
 DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
 override define INSTALL_RULE
 	$(DEFAULT_INSTALL_RULE)
-	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET: -C $$TARGET install
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
 endef
 
 clean:
 	$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
-	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET: -C $$TARGET clean
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
 
 ebb:
 	TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 07da21769ff8..5576ee6a51f2 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -14,5 +14,6 @@ $(OUTPUT)/tm-syscall: tm-syscall-asm.S
 $(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
 $(OUTPUT)/tm-tmspr: CFLAGS += -pthread
 
+SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
 $(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
 $(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
-- 
cgit v1.2.3


From d498f8719a098a5df7c6dba4ea302df7afb51efd Mon Sep 17 00:00:00 2001
From: Mickaël Salaün <mic@digikod.net>
Date: Sat, 11 Feb 2017 23:20:23 +0100
Subject: bpf: Rebuild bpf.o for any dependency update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is needed to force a rebuild of bpf.o when one of its dependencies
(e.g. uapi/linux/bpf.h) is updated.

Add a phony target.

Signed-off-by: Mickaël Salaün <mic@digikod.net>
Cc: Alexei Starovoitov <ast@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index a35f564f66a1..c7816fe60feb 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,13 +1,24 @@
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I../../../lib
+LIBDIR := ../../../lib
+BPFOBJ := $(LIBDIR)/bpf/bpf.o
+
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
 
 test_objs = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := $(test_objs) test_kmod.sh
 TEST_FILES := $(test_objs)
 
+.PHONY: all clean force
+
 all: $(test_objs)
 
-$(test_objs): ../../../lib/bpf/bpf.o
+# force a rebuild of BPFOBJ when its dependencies are updated
+force:
+
+$(BPFOBJ): force
+	$(MAKE) -C $(dir $(BPFOBJ))
+
+$(test_objs): $(BPFOBJ)
 
 include ../lib.mk
 
-- 
cgit v1.2.3


From b8826e506ee58873725ec3a25a2a27fefd762574 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Feb 2017 22:07:24 -0500
Subject: selftest for default_file_splice_read() infoleak

bug fixed in commit b9dc6f65bc5e ("fix a fencepost error in pipe_advance()")

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 tools/testing/selftests/Makefile                           | 1 +
 tools/testing/selftests/splice/Makefile                    | 8 ++++++++
 tools/testing/selftests/splice/default_file_splice_read.c  | 8 ++++++++
 tools/testing/selftests/splice/default_file_splice_read.sh | 7 +++++++
 4 files changed, 24 insertions(+)
 create mode 100644 tools/testing/selftests/splice/Makefile
 create mode 100644 tools/testing/selftests/splice/default_file_splice_read.c
 create mode 100755 tools/testing/selftests/splice/default_file_splice_read.sh

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 71b05891a6a1..0e72f1d03c9e 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -24,6 +24,7 @@ TARGETS += ptrace
 TARGETS += seccomp
 TARGETS += sigaltstack
 TARGETS += size
+TARGETS += splice
 TARGETS += static_keys
 TARGETS += sync
 TARGETS += sysctl
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
new file mode 100644
index 000000000000..de51f439d4a6
--- /dev/null
+++ b/tools/testing/selftests/splice/Makefile
@@ -0,0 +1,8 @@
+TEST_PROGS := default_file_splice_read.sh
+EXTRA := default_file_splice_read
+all: $(TEST_PROGS) $(EXTRA)
+
+include ../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS) $(EXTRA)
diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c
new file mode 100644
index 000000000000..01dd6091554c
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.c
@@ -0,0 +1,8 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+        splice(0, 0, 1, 0, 1<<30, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
new file mode 100755
index 000000000000..1ea2adeabc94
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+n=`./default_file_splice_read </dev/null | wc -c`
+
+test "$n" = 0 && exit 0
+
+echo "default_file_splice_read broken: leaked $n"
+exit 1
-- 
cgit v1.2.3


From 9903bd7b73ef0dec429e951009b36643eb0fe239 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Wed, 22 Feb 2017 15:43:07 -0800
Subject: userfaultfd: hugetlbfs: add userfaultfd_hugetlb test

Test userfaultfd hugetlb functionality by using the existing testing
method (in userfaultfd.c).  Instead of an anonymous memeory, a hugetlbfs
file is mmap'ed private.  In this way fallocate hole punch can be used
to release pages.  This is because madvise(MADV_DONTNEED) is not
supported for huge pages.

Use the same file, but create wrappers for allocating ranges and
releasing pages.  Compile userfaultfd.c with HUGETLB_TEST defined to
produce an executable to test userfaultfd hugetlb functionality.

Link: http://lkml.kernel.org/r/20161216144821.5183-23-aarcange@redhat.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile      |   4 +
 tools/testing/selftests/vm/run_vmtests   |  13 +++
 tools/testing/selftests/vm/userfaultfd.c | 161 +++++++++++++++++++++++++++----
 3 files changed, 161 insertions(+), 17 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index bbab7f4664ac..0114aac78e1f 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -10,6 +10,7 @@ BINARIES += on-fault-limit
 BINARIES += thuge-gen
 BINARIES += transhuge-stress
 BINARIES += userfaultfd
+BINARIES += userfaultfd_hugetlb
 BINARIES += mlock-random-test
 
 all: $(BINARIES)
@@ -18,6 +19,9 @@ all: $(BINARIES)
 userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
 	$(CC) $(CFLAGS) -O2 -o $@ $< -lpthread
 
+userfaultfd_hugetlb: userfaultfd.c ../../../../usr/include/linux/kernel.h
+	$(CC) $(CFLAGS) -DHUGETLB_TEST -O2 -o $@ $< -lpthread
+
 mlock-random-test: mlock-random-test.c
 	$(CC) $(CFLAGS) -o $@ $< -lcap
 
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index e11968b3677e..14d697ee00c4 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -103,6 +103,19 @@ else
 	echo "[PASS]"
 fi
 
+echo "----------------------------"
+echo "running userfaultfd_hugetlb"
+echo "----------------------------"
+# 258MB total huge pages == 128MB src and 128MB dst
+./userfaultfd_hugetlb 128 32 $mnt/ufd_test_file
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+rm -f $mnt/ufd_test_file
+
 #cleanup
 umount $mnt
 rm -rf $mnt
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index d77ed41b2094..3011711212ca 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -76,6 +76,10 @@ static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
 #define BOUNCE_POLL		(1<<3)
 static int bounces;
 
+#ifdef HUGETLB_TEST
+static int huge_fd;
+static char *huge_fd_off0;
+#endif
 static unsigned long long *count_verify;
 static int uffd, finished, *pipefd;
 static char *area_src, *area_dst;
@@ -97,6 +101,69 @@ pthread_attr_t attr;
 				 ~(unsigned long)(sizeof(unsigned long long) \
 						  -  1)))
 
+#ifndef HUGETLB_TEST
+
+#define EXPECTED_IOCTLS		((1 << _UFFDIO_WAKE) | \
+				 (1 << _UFFDIO_COPY) | \
+				 (1 << _UFFDIO_ZEROPAGE))
+
+static int release_pages(char *rel_area)
+{
+	int ret = 0;
+
+	if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static void allocate_area(void **alloc_area)
+{
+	if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
+		fprintf(stderr, "out of memory\n");
+		*alloc_area = NULL;
+	}
+}
+
+#else /* HUGETLB_TEST */
+
+#define EXPECTED_IOCTLS		UFFD_API_RANGE_IOCTLS_HPAGE
+
+static int release_pages(char *rel_area)
+{
+	int ret = 0;
+
+	if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				rel_area == huge_fd_off0 ? 0 :
+				nr_pages * page_size,
+				nr_pages * page_size)) {
+		perror("fallocate");
+		ret = 1;
+	}
+
+	return ret;
+}
+
+
+static void allocate_area(void **alloc_area)
+{
+	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_HUGETLB, huge_fd,
+				*alloc_area == area_src ? 0 :
+				nr_pages * page_size);
+	if (*alloc_area == MAP_FAILED) {
+		fprintf(stderr, "mmap of hugetlbfs file failed\n");
+		*alloc_area = NULL;
+	}
+
+	if (*alloc_area == area_src)
+		huge_fd_off0 = *alloc_area;
+}
+
+#endif /* HUGETLB_TEST */
+
 static int my_bcmp(char *str1, char *str2, size_t n)
 {
 	unsigned long i;
@@ -384,10 +451,8 @@ static int stress(unsigned long *userfaults)
 	 * UFFDIO_COPY without writing zero pages into area_dst
 	 * because the background threads already completed).
 	 */
-	if (madvise(area_src, nr_pages * page_size, MADV_DONTNEED)) {
-		perror("madvise");
+	if (release_pages(area_src))
 		return 1;
-	}
 
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		char c;
@@ -425,16 +490,12 @@ static int userfaultfd_stress(void)
 	int uffd_flags, err;
 	unsigned long userfaults[nr_cpus];
 
-	if (posix_memalign(&area, page_size, nr_pages * page_size)) {
-		fprintf(stderr, "out of memory\n");
+	allocate_area((void **)&area_src);
+	if (!area_src)
 		return 1;
-	}
-	area_src = area;
-	if (posix_memalign(&area, page_size, nr_pages * page_size)) {
-		fprintf(stderr, "out of memory\n");
+	allocate_area((void **)&area_dst);
+	if (!area_dst)
 		return 1;
-	}
-	area_dst = area;
 
 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 	if (uffd < 0) {
@@ -528,9 +589,7 @@ static int userfaultfd_stress(void)
 			fprintf(stderr, "register failure\n");
 			return 1;
 		}
-		expected_ioctls = (1 << _UFFDIO_WAKE) |
-				  (1 << _UFFDIO_COPY) |
-				  (1 << _UFFDIO_ZEROPAGE);
+		expected_ioctls = EXPECTED_IOCTLS;
 		if ((uffdio_register.ioctls & expected_ioctls) !=
 		    expected_ioctls) {
 			fprintf(stderr,
@@ -562,10 +621,8 @@ static int userfaultfd_stress(void)
 		 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
 		 * required to MADV_DONTNEED here.
 		 */
-		if (madvise(area_dst, nr_pages * page_size, MADV_DONTNEED)) {
-			perror("madvise 2");
+		if (release_pages(area_dst))
 			return 1;
-		}
 
 		/* bounce pass */
 		if (stress(userfaults))
@@ -606,6 +663,8 @@ static int userfaultfd_stress(void)
 	return err;
 }
 
+#ifndef HUGETLB_TEST
+
 int main(int argc, char **argv)
 {
 	if (argc < 3)
@@ -632,6 +691,74 @@ int main(int argc, char **argv)
 	return userfaultfd_stress();
 }
 
+#else /* HUGETLB_TEST */
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(f);
+	return hps;
+}
+
+int main(int argc, char **argv)
+{
+	if (argc < 4)
+		fprintf(stderr, "Usage: <MiB> <bounces> <hugetlbfs_file>\n"),
+				exit(1);
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	page_size = default_huge_page_size();
+	if (!page_size)
+		fprintf(stderr, "Unable to determine huge page size\n"),
+				exit(2);
+	if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+	    > page_size)
+		fprintf(stderr, "Impossible to run this test\n"), exit(2);
+	nr_pages_per_cpu = atol(argv[1]) * 1024*1024 / page_size /
+		nr_cpus;
+	if (!nr_pages_per_cpu) {
+		fprintf(stderr, "invalid MiB\n");
+		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+	}
+	bounces = atoi(argv[2]);
+	if (bounces <= 0) {
+		fprintf(stderr, "invalid bounces\n");
+		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+	}
+	nr_pages = nr_pages_per_cpu * nr_cpus;
+	huge_fd = open(argv[3], O_CREAT | O_RDWR, 0755);
+	if (huge_fd < 0) {
+		fprintf(stderr, "Open of %s failed", argv[3]);
+		perror("open");
+		exit(1);
+	}
+	if (ftruncate(huge_fd, 0)) {
+		fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
+		perror("ftruncate");
+		exit(1);
+	}
+	printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+	       nr_pages, nr_pages_per_cpu);
+	return userfaultfd_stress();
+}
+
+#endif
 #else /* __NR_userfaultfd */
 
 #warning "missing __NR_userfaultfd definition"
-- 
cgit v1.2.3


From cac673292b9b39493bb0ff526b96c83ace6fdcd0 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:43:40 -0800
Subject: userfaultfd: shmem: allow registration of shared memory ranges

Expand the userfaultfd_register/unregister routines to allow shared
memory VMAs.

Currently, there is no UFFDIO_ZEROPAGE and write-protection support for
shared memory VMAs, which is reflected in ioctl methods supported by
uffdio_register.

Link: http://lkml.kernel.org/r/20161216144821.5183-34-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c                         | 21 +++++++--------------
 include/uapi/linux/userfaultfd.h         |  2 +-
 tools/testing/selftests/vm/userfaultfd.c |  2 +-
 3 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 593135c296bc..18406158e13f 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1098,7 +1098,8 @@ static __always_inline int validate_range(struct mm_struct *mm,
 
 static inline bool vma_can_userfault(struct vm_area_struct *vma)
 {
-	return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma);
+	return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
+		vma_is_shmem(vma);
 }
 
 static int userfaultfd_register(struct userfaultfd_ctx *ctx,
@@ -1111,7 +1112,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 	struct uffdio_register __user *user_uffdio_register;
 	unsigned long vm_flags, new_flags;
 	bool found;
-	bool huge_pages;
+	bool non_anon_pages;
 	unsigned long start, end, vma_end;
 
 	user_uffdio_register = (struct uffdio_register __user *) arg;
@@ -1175,13 +1176,9 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 
 	/*
 	 * Search for not compatible vmas.
-	 *
-	 * FIXME: this shall be relaxed later so that it doesn't fail
-	 * on tmpfs backed vmas (in addition to the current allowance
-	 * on anonymous vmas).
 	 */
 	found = false;
-	huge_pages = false;
+	non_anon_pages = false;
 	for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
 		cond_resched();
 
@@ -1220,8 +1217,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		/*
 		 * Note vmas containing huge pages
 		 */
-		if (is_vm_hugetlb_page(cur))
-			huge_pages = true;
+		if (is_vm_hugetlb_page(cur) || vma_is_shmem(cur))
+			non_anon_pages = true;
 
 		found = true;
 	}
@@ -1292,7 +1289,7 @@ out_unlock:
 		 * userland which ioctls methods are guaranteed to
 		 * succeed on this range.
 		 */
-		if (put_user(huge_pages ? UFFD_API_RANGE_IOCTLS_HPAGE :
+		if (put_user(non_anon_pages ? UFFD_API_RANGE_IOCTLS_BASIC :
 			     UFFD_API_RANGE_IOCTLS,
 			     &user_uffdio_register->ioctls))
 			ret = -EFAULT;
@@ -1352,10 +1349,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 
 	/*
 	 * Search for not compatible vmas.
-	 *
-	 * FIXME: this shall be relaxed later so that it doesn't fail
-	 * on tmpfs backed vmas (in addition to the current allowance
-	 * on anonymous vmas).
 	 */
 	found = false;
 	ret = -EINVAL;
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 7293321abdfb..10631a4cdb24 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -30,7 +30,7 @@
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY |		\
 	 (__u64)1 << _UFFDIO_ZEROPAGE)
-#define UFFD_API_RANGE_IOCTLS_HPAGE		\
+#define UFFD_API_RANGE_IOCTLS_BASIC		\
 	((__u64)1 << _UFFDIO_WAKE |		\
 	 (__u64)1 << _UFFDIO_COPY)
 
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 3011711212ca..d753a9161411 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -129,7 +129,7 @@ static void allocate_area(void **alloc_area)
 
 #else /* HUGETLB_TEST */
 
-#define EXPECTED_IOCTLS		UFFD_API_RANGE_IOCTLS_HPAGE
+#define EXPECTED_IOCTLS		UFFD_API_RANGE_IOCTLS_BASIC
 
 static int release_pages(char *rel_area)
 {
-- 
cgit v1.2.3


From 419624daf0e827452837177c4b983dc0f1b6429f Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:43:46 -0800
Subject: userfaultfd: shmem: add userfaultfd_shmem test

The test verifies that anonymous shared mapping can be used with userfault
using the existing testing method.  The shared memory area is allocated
using mmap(..., MAP_SHARED | MAP_ANONYMOUS, ...) and released using
madvise(MADV_REMOVE)

Link: http://lkml.kernel.org/r/20161216144821.5183-35-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile      |  4 ++++
 tools/testing/selftests/vm/run_vmtests   | 11 ++++++++++
 tools/testing/selftests/vm/userfaultfd.c | 37 ++++++++++++++++++++++++++++++--
 3 files changed, 50 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 0114aac78e1f..900dfaf81051 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -11,6 +11,7 @@ BINARIES += thuge-gen
 BINARIES += transhuge-stress
 BINARIES += userfaultfd
 BINARIES += userfaultfd_hugetlb
+BINARIES += userfaultfd_shmem
 BINARIES += mlock-random-test
 
 all: $(BINARIES)
@@ -22,6 +23,9 @@ userfaultfd: userfaultfd.c ../../../../usr/include/linux/kernel.h
 userfaultfd_hugetlb: userfaultfd.c ../../../../usr/include/linux/kernel.h
 	$(CC) $(CFLAGS) -DHUGETLB_TEST -O2 -o $@ $< -lpthread
 
+userfaultfd_shmem: userfaultfd.c ../../../../usr/include/linux/kernel.h
+	$(CC) $(CFLAGS) -DSHMEM_TEST -O2 -o $@ $< -lpthread
+
 mlock-random-test: mlock-random-test.c
 	$(CC) $(CFLAGS) -o $@ $< -lcap
 
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index 14d697ee00c4..c92f6cf31d0a 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -116,6 +116,17 @@ else
 fi
 rm -f $mnt/ufd_test_file
 
+echo "----------------------------"
+echo "running userfaultfd_shmem"
+echo "----------------------------"
+./userfaultfd_shmem 128 32
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 #cleanup
 umount $mnt
 rm -rf $mnt
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index d753a9161411..a5e5808c86cd 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -101,8 +101,9 @@ pthread_attr_t attr;
 				 ~(unsigned long)(sizeof(unsigned long long) \
 						  -  1)))
 
-#ifndef HUGETLB_TEST
+#if !defined(HUGETLB_TEST) && !defined(SHMEM_TEST)
 
+/* Anonymous memory */
 #define EXPECTED_IOCTLS		((1 << _UFFDIO_WAKE) | \
 				 (1 << _UFFDIO_COPY) | \
 				 (1 << _UFFDIO_ZEROPAGE))
@@ -127,10 +128,13 @@ static void allocate_area(void **alloc_area)
 	}
 }
 
-#else /* HUGETLB_TEST */
+#else /* HUGETLB_TEST or SHMEM_TEST */
 
 #define EXPECTED_IOCTLS		UFFD_API_RANGE_IOCTLS_BASIC
 
+#ifdef HUGETLB_TEST
+
+/* HugeTLB memory */
 static int release_pages(char *rel_area)
 {
 	int ret = 0;
@@ -162,8 +166,37 @@ static void allocate_area(void **alloc_area)
 		huge_fd_off0 = *alloc_area;
 }
 
+#elif defined(SHMEM_TEST)
+
+/* Shared memory */
+static int release_pages(char *rel_area)
+{
+	int ret = 0;
+
+	if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
+		perror("madvise");
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static void allocate_area(void **alloc_area)
+{
+	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+			   MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (*alloc_area == MAP_FAILED) {
+		fprintf(stderr, "shared memory mmap failed\n");
+		*alloc_area = NULL;
+	}
+}
+
+#else /* SHMEM_TEST */
+#error "Undefined test type"
 #endif /* HUGETLB_TEST */
 
+#endif /* !defined(HUGETLB_TEST) && !defined(SHMEM_TEST) */
+
 static int my_bcmp(char *str1, char *str2, size_t n)
 {
 	unsigned long i;
-- 
cgit v1.2.3


From 6228b8f2d15bc9a9b76d6b209a8b760a642fa996 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:44:01 -0800
Subject: userfaultfd: non-cooperative: selftest: introduce userfaultfd_open

userfaultfd_open will be needed by the non cooperative selftest.

Link: http://lkml.kernel.org/r/20161216144821.5183-39-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 41 +++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 16 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index a5e5808c86cd..75540e770b82 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -81,7 +81,7 @@ static int huge_fd;
 static char *huge_fd_off0;
 #endif
 static unsigned long long *count_verify;
-static int uffd, finished, *pipefd;
+static int uffd, uffd_flags, finished, *pipefd;
 static char *area_src, *area_dst;
 static char *zeropage;
 pthread_attr_t attr;
@@ -512,23 +512,9 @@ static int stress(unsigned long *userfaults)
 	return 0;
 }
 
-static int userfaultfd_stress(void)
+static int userfaultfd_open(void)
 {
-	void *area;
-	char *tmp_area;
-	unsigned long nr;
-	struct uffdio_register uffdio_register;
 	struct uffdio_api uffdio_api;
-	unsigned long cpu;
-	int uffd_flags, err;
-	unsigned long userfaults[nr_cpus];
-
-	allocate_area((void **)&area_src);
-	if (!area_src)
-		return 1;
-	allocate_area((void **)&area_dst);
-	if (!area_dst)
-		return 1;
 
 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 	if (uffd < 0) {
@@ -549,6 +535,29 @@ static int userfaultfd_stress(void)
 		return 1;
 	}
 
+	return 0;
+}
+
+static int userfaultfd_stress(void)
+{
+	void *area;
+	char *tmp_area;
+	unsigned long nr;
+	struct uffdio_register uffdio_register;
+	unsigned long cpu;
+	int err;
+	unsigned long userfaults[nr_cpus];
+
+	allocate_area((void **)&area_src);
+	if (!area_src)
+		return 1;
+	allocate_area((void **)&area_dst);
+	if (!area_dst)
+		return 1;
+
+	if (userfaultfd_open() < 0)
+		return 1;
+
 	count_verify = malloc(nr_pages * sizeof(unsigned long long));
 	if (!count_verify) {
 		perror("count_verify");
-- 
cgit v1.2.3


From aa0d27217477acbc1cfcc4fdaa4de4f3ce545b4e Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:44:04 -0800
Subject: userfaultfd: non-cooperative: selftest: add ufd parameter to
 copy_page

With future addition of event tests, copy_page will be called with
different userfault file descriptors

Link: http://lkml.kernel.org/r/20161216144821.5183-40-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 75540e770b82..c79c372db2da 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -317,7 +317,7 @@ static void *locking_thread(void *arg)
 	return NULL;
 }
 
-static int copy_page(unsigned long offset)
+static int copy_page(int ufd, unsigned long offset)
 {
 	struct uffdio_copy uffdio_copy;
 
@@ -329,7 +329,7 @@ static int copy_page(unsigned long offset)
 	uffdio_copy.len = page_size;
 	uffdio_copy.mode = 0;
 	uffdio_copy.copy = 0;
-	if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
+	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
 		if (uffdio_copy.copy != -EEXIST)
 			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
@@ -386,7 +386,7 @@ static void *uffd_poll_thread(void *arg)
 		offset = (char *)(unsigned long)msg.arg.pagefault.address -
 			 area_dst;
 		offset &= ~(page_size-1);
-		if (copy_page(offset))
+		if (copy_page(uffd, offset))
 			userfaults++;
 	}
 	return (void *)userfaults;
@@ -424,7 +424,7 @@ static void *uffd_read_thread(void *arg)
 		offset = (char *)(unsigned long)msg.arg.pagefault.address -
 			 area_dst;
 		offset &= ~(page_size-1);
-		if (copy_page(offset))
+		if (copy_page(uffd, offset))
 			(*this_cpu_userfaults)++;
 	}
 	return (void *)NULL;
@@ -438,7 +438,7 @@ static void *background_thread(void *arg)
 	for (page_nr = cpu * nr_pages_per_cpu;
 	     page_nr < (cpu+1) * nr_pages_per_cpu;
 	     page_nr++)
-		copy_page(page_nr * page_size);
+		copy_page(uffd, page_nr * page_size);
 
 	return NULL;
 }
-- 
cgit v1.2.3


From da5502c0a39b7ba28f403a4b87d1dd690e7829bf Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Wed, 22 Feb 2017 15:44:06 -0800
Subject: userfaultfd: non-cooperative: selftest: add test for FORK,
 MADVDONTNEED and REMAP events

Add test for userfaultfd events used in non-cooperative scenario when
the process that monitors the userfaultfd and handles user faults is not
the same process that causes the page faults.

Link: http://lkml.kernel.org/r/20161216144821.5183-41-aarcange@redhat.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 175 ++++++++++++++++++++++++++++---
 1 file changed, 163 insertions(+), 12 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index c79c372db2da..71b4d820b011 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -63,6 +63,7 @@
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
+#include <sys/wait.h>
 #include <pthread.h>
 #include <linux/userfaultfd.h>
 
@@ -347,6 +348,7 @@ static void *uffd_poll_thread(void *arg)
 	unsigned long cpu = (unsigned long) arg;
 	struct pollfd pollfd[2];
 	struct uffd_msg msg;
+	struct uffdio_register uffd_reg;
 	int ret;
 	unsigned long offset;
 	char tmp_chr;
@@ -378,16 +380,35 @@ static void *uffd_poll_thread(void *arg)
 				continue;
 			perror("nonblocking read error"), exit(1);
 		}
-		if (msg.event != UFFD_EVENT_PAGEFAULT)
+		switch (msg.event) {
+		default:
 			fprintf(stderr, "unexpected msg event %u\n",
 				msg.event), exit(1);
-		if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-			fprintf(stderr, "unexpected write fault\n"), exit(1);
-		offset = (char *)(unsigned long)msg.arg.pagefault.address -
-			 area_dst;
-		offset &= ~(page_size-1);
-		if (copy_page(uffd, offset))
-			userfaults++;
+			break;
+		case UFFD_EVENT_PAGEFAULT:
+			if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+				fprintf(stderr, "unexpected write fault\n"), exit(1);
+			offset = (char *)(unsigned long)msg.arg.pagefault.address -
+				area_dst;
+			offset &= ~(page_size-1);
+			if (copy_page(uffd, offset))
+				userfaults++;
+			break;
+		case UFFD_EVENT_FORK:
+			uffd = msg.arg.fork.ufd;
+			pollfd[0].fd = uffd;
+			break;
+		case UFFD_EVENT_MADVDONTNEED:
+			uffd_reg.range.start = msg.arg.madv_dn.start;
+			uffd_reg.range.len = msg.arg.madv_dn.end -
+				msg.arg.madv_dn.start;
+			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+				fprintf(stderr, "madv_dn failure\n"), exit(1);
+			break;
+		case UFFD_EVENT_REMAP:
+			area_dst = (char *)(unsigned long)msg.arg.remap.to;
+			break;
+		}
 	}
 	return (void *)userfaults;
 }
@@ -512,7 +533,7 @@ static int stress(unsigned long *userfaults)
 	return 0;
 }
 
-static int userfaultfd_open(void)
+static int userfaultfd_open(int features)
 {
 	struct uffdio_api uffdio_api;
 
@@ -525,7 +546,7 @@ static int userfaultfd_open(void)
 	uffd_flags = fcntl(uffd, F_GETFD, NULL);
 
 	uffdio_api.api = UFFD_API;
-	uffdio_api.features = 0;
+	uffdio_api.features = features;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
 		fprintf(stderr, "UFFDIO_API\n");
 		return 1;
@@ -538,6 +559,132 @@ static int userfaultfd_open(void)
 	return 0;
 }
 
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event only for
+ * anonymous memory (UFFD_EVENT_MADVDONTNEED), hence it is not checked
+ * for hugetlb and shmem.
+ */
+static int faulting_process(void)
+{
+	unsigned long nr;
+	unsigned long long count;
+
+#ifndef HUGETLB_TEST
+	unsigned long split_nr_pages = (nr_pages + 1) / 2;
+#else
+	unsigned long split_nr_pages = nr_pages;
+#endif
+
+	for (nr = 0; nr < split_nr_pages; nr++) {
+		count = *area_count(area_dst, nr);
+		if (count != count_verify[nr]) {
+			fprintf(stderr,
+				"nr %lu memory corruption %Lu %Lu\n",
+				nr, count,
+				count_verify[nr]), exit(1);
+		}
+	}
+
+#ifndef HUGETLB_TEST
+	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
+	if (area_dst == MAP_FAILED)
+		perror("mremap"), exit(1);
+
+	for (; nr < nr_pages; nr++) {
+		count = *area_count(area_dst, nr);
+		if (count != count_verify[nr]) {
+			fprintf(stderr,
+				"nr %lu memory corruption %Lu %Lu\n",
+				nr, count,
+				count_verify[nr]), exit(1);
+		}
+	}
+
+#ifndef SHMEM_TEST
+	if (release_pages(area_dst))
+		return 1;
+
+	for (nr = 0; nr < nr_pages; nr++) {
+		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+	}
+#endif /* SHMEM_TEST */
+
+#endif /* HUGETLB_TEST */
+
+	return 0;
+}
+
+static int userfaultfd_events_test(void)
+{
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+	unsigned long userfaults;
+	pthread_t uffd_mon;
+	int err, features;
+	pid_t pid;
+	char c;
+
+	printf("testing events (fork, remap, madv_dn): ");
+	fflush(stdout);
+
+	if (release_pages(area_dst))
+		return 1;
+
+	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
+		UFFD_FEATURE_EVENT_MADVDONTNEED;
+	if (userfaultfd_open(features) < 0)
+		return 1;
+	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+	uffdio_register.range.start = (unsigned long) area_dst;
+	uffdio_register.range.len = nr_pages * page_size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		fprintf(stderr, "register failure\n"), exit(1);
+
+	expected_ioctls = EXPECTED_IOCTLS;
+	if ((uffdio_register.ioctls & expected_ioctls) !=
+	    expected_ioctls)
+		fprintf(stderr,
+			"unexpected missing ioctl for anon memory\n"),
+			exit(1);
+
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+		perror("uffd_poll_thread create"), exit(1);
+
+	pid = fork();
+	if (pid < 0)
+		perror("fork"), exit(1);
+
+	if (!pid)
+		return faulting_process();
+
+	waitpid(pid, &err, 0);
+	if (err)
+		fprintf(stderr, "faulting process failed\n"), exit(1);
+
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+		perror("pipe write"), exit(1);
+	if (pthread_join(uffd_mon, (void **)&userfaults))
+		return 1;
+
+	close(uffd);
+	printf("userfaults: %ld\n", userfaults);
+
+	return userfaults != nr_pages;
+}
+
 static int userfaultfd_stress(void)
 {
 	void *area;
@@ -555,7 +702,7 @@ static int userfaultfd_stress(void)
 	if (!area_dst)
 		return 1;
 
-	if (userfaultfd_open() < 0)
+	if (userfaultfd_open(0) < 0)
 		return 1;
 
 	count_verify = malloc(nr_pages * sizeof(unsigned long long));
@@ -702,7 +849,11 @@ static int userfaultfd_stress(void)
 		printf("\n");
 	}
 
-	return err;
+	if (err)
+		return err;
+
+	close(uffd);
+	return userfaultfd_events_test();
 }
 
 #ifndef HUGETLB_TEST
-- 
cgit v1.2.3


From 7a0c4cf85b856430af62a907dd65dfc51438d24f Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Wed, 22 Feb 2017 15:44:10 -0800
Subject: userfaultfd: selftest: test UFFDIO_ZEROPAGE on all memory types

This will verify -EINVAL is returned with hugetlbfs/shmem and it'll do a
functional test of UFFDIO_ZEROPAGE on anonymous memory.

Link: http://lkml.kernel.org/r/20161216144821.5183-42-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 82 +++++++++++++++++++++++++++++++-
 1 file changed, 81 insertions(+), 1 deletion(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 71b4d820b011..5a840a605a16 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -625,6 +625,86 @@ static int faulting_process(void)
 	return 0;
 }
 
+static int uffdio_zeropage(int ufd, unsigned long offset)
+{
+	struct uffdio_zeropage uffdio_zeropage;
+	int ret;
+	unsigned long has_zeropage = EXPECTED_IOCTLS & (1 << _UFFDIO_ZEROPAGE);
+
+	if (offset >= nr_pages * page_size)
+		fprintf(stderr, "unexpected offset %lu\n",
+			offset), exit(1);
+	uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
+	uffdio_zeropage.range.len = page_size;
+	uffdio_zeropage.mode = 0;
+	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+	if (ret) {
+		/* real retval in ufdio_zeropage.zeropage */
+		if (has_zeropage) {
+			if (uffdio_zeropage.zeropage == -EEXIST)
+				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
+					exit(1);
+			else
+				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
+					uffdio_zeropage.zeropage), exit(1);
+		} else {
+			if (uffdio_zeropage.zeropage != -EINVAL)
+				fprintf(stderr,
+					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
+					uffdio_zeropage.zeropage), exit(1);
+		}
+	} else if (has_zeropage) {
+		if (uffdio_zeropage.zeropage != page_size) {
+			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
+				uffdio_zeropage.zeropage), exit(1);
+		} else
+			return 1;
+	} else {
+		fprintf(stderr,
+			"UFFDIO_ZEROPAGE succeeded %Ld\n",
+			uffdio_zeropage.zeropage), exit(1);
+	}
+
+	return 0;
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static int userfaultfd_zeropage_test(void)
+{
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+
+	printf("testing UFFDIO_ZEROPAGE: ");
+	fflush(stdout);
+
+	if (release_pages(area_dst))
+		return 1;
+
+	if (userfaultfd_open(0) < 0)
+		return 1;
+	uffdio_register.range.start = (unsigned long) area_dst;
+	uffdio_register.range.len = nr_pages * page_size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		fprintf(stderr, "register failure\n"), exit(1);
+
+	expected_ioctls = EXPECTED_IOCTLS;
+	if ((uffdio_register.ioctls & expected_ioctls) !=
+	    expected_ioctls)
+		fprintf(stderr,
+			"unexpected missing ioctl for anon memory\n"),
+			exit(1);
+
+	if (uffdio_zeropage(uffd, 0)) {
+		if (my_bcmp(area_dst, zeropage, page_size))
+			fprintf(stderr, "zeropage is not zero\n"), exit(1);
+	}
+
+	close(uffd);
+	printf("done.\n");
+	return 0;
+}
+
 static int userfaultfd_events_test(void)
 {
 	struct uffdio_register uffdio_register;
@@ -853,7 +933,7 @@ static int userfaultfd_stress(void)
 		return err;
 
 	close(uffd);
-	return userfaultfd_events_test();
+	return userfaultfd_zeropage_test() || userfaultfd_events_test();
 }
 
 #ifndef HUGETLB_TEST
-- 
cgit v1.2.3


From d811914d87576c562e849c00d9f9beff45038801 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 14:56:02 -0800
Subject: userfaultfd: non-cooperative: rename *EVENT_MADVDONTNEED to
 *EVENT_REMOVE

Patch series "userfaultfd: non-cooperative: add madvise() event for
MADV_REMOVE request".

These patches add notification of madvise(MADV_REMOVE) event to
non-cooperative userfaultfd monitor.

The first pacth renames EVENT_MADVDONTNEED to EVENT_REMOVE along with
relevant functions and structures.  Using _REMOVE instead of
_MADVDONTNEED describes the event semantics more clearly and I hope it's
not too late for such change in the ABI.

This patch (of 3):

The UFFD_EVENT_MADVDONTNEED purpose is to notify uffd monitor about
removal of certain range from address space tracked by userfaultfd.
Hence, UFFD_EVENT_REMOVE seems to better reflect the operation
semantics.  Respectively, 'madv_dn' field of uffd_msg is renamed to
'remove' and the madvise_userfault_dontneed callback is renamed to
userfaultfd_remove.

Link: http://lkml.kernel.org/r/1484814154-1557-2-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/userfaultfd.c                         | 14 +++++++-------
 include/linux/userfaultfd_k.h            | 16 ++++++++--------
 include/uapi/linux/userfaultfd.h         |  8 ++++----
 mm/madvise.c                             |  2 +-
 tools/testing/selftests/vm/userfaultfd.c | 16 ++++++++--------
 5 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'tools/testing')

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 18406158e13f..8fe601b4875e 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -681,16 +681,16 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
 	userfaultfd_event_wait_completion(ctx, &ewq);
 }
 
-void madvise_userfault_dontneed(struct vm_area_struct *vma,
-				struct vm_area_struct **prev,
-				unsigned long start, unsigned long end)
+void userfaultfd_remove(struct vm_area_struct *vma,
+			struct vm_area_struct **prev,
+			unsigned long start, unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct userfaultfd_ctx *ctx;
 	struct userfaultfd_wait_queue ewq;
 
 	ctx = vma->vm_userfaultfd_ctx.ctx;
-	if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_MADVDONTNEED))
+	if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
 		return;
 
 	userfaultfd_ctx_get(ctx);
@@ -700,9 +700,9 @@ void madvise_userfault_dontneed(struct vm_area_struct *vma,
 
 	msg_init(&ewq.msg);
 
-	ewq.msg.event = UFFD_EVENT_MADVDONTNEED;
-	ewq.msg.arg.madv_dn.start = start;
-	ewq.msg.arg.madv_dn.end = end;
+	ewq.msg.event = UFFD_EVENT_REMOVE;
+	ewq.msg.arg.remove.start = start;
+	ewq.msg.arg.remove.end = end;
 
 	userfaultfd_event_wait_completion(ctx, &ewq);
 
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f431861f22f1..2521542f6c07 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -61,10 +61,10 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
 					unsigned long from, unsigned long to,
 					unsigned long len);
 
-extern void madvise_userfault_dontneed(struct vm_area_struct *vma,
-				       struct vm_area_struct **prev,
-				       unsigned long start,
-				       unsigned long end);
+extern void userfaultfd_remove(struct vm_area_struct *vma,
+			       struct vm_area_struct **prev,
+			       unsigned long start,
+			       unsigned long end);
 
 #else /* CONFIG_USERFAULTFD */
 
@@ -112,10 +112,10 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
 {
 }
 
-static inline void madvise_userfault_dontneed(struct vm_area_struct *vma,
-					      struct vm_area_struct **prev,
-					      unsigned long start,
-					      unsigned long end)
+static inline void userfaultfd_remove(struct vm_area_struct *vma,
+				      struct vm_area_struct **prev,
+				      unsigned long start,
+				      unsigned long end)
 {
 }
 #endif /* CONFIG_USERFAULTFD */
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 9ac4b68c54d1..b742c40c2880 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -20,7 +20,7 @@
 #define UFFD_API ((__u64)0xAA)
 #define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK |		\
 			   UFFD_FEATURE_EVENT_REMAP |		\
-			   UFFD_FEATURE_EVENT_MADVDONTNEED |	\
+			   UFFD_FEATURE_EVENT_REMOVE |	\
 			   UFFD_FEATURE_MISSING_HUGETLBFS |	\
 			   UFFD_FEATURE_MISSING_SHMEM)
 #define UFFD_API_IOCTLS				\
@@ -92,7 +92,7 @@ struct uffd_msg {
 		struct {
 			__u64	start;
 			__u64	end;
-		} madv_dn;
+		} remove;
 
 		struct {
 			/* unused reserved fields */
@@ -109,7 +109,7 @@ struct uffd_msg {
 #define UFFD_EVENT_PAGEFAULT	0x12
 #define UFFD_EVENT_FORK		0x13
 #define UFFD_EVENT_REMAP	0x14
-#define UFFD_EVENT_MADVDONTNEED	0x15
+#define UFFD_EVENT_REMOVE	0x15
 
 /* flags for UFFD_EVENT_PAGEFAULT */
 #define UFFD_PAGEFAULT_FLAG_WRITE	(1<<0)	/* If this was a write fault */
@@ -155,7 +155,7 @@ struct uffdio_api {
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
 #define UFFD_FEATURE_EVENT_REMAP		(1<<2)
-#define UFFD_FEATURE_EVENT_MADVDONTNEED		(1<<3)
+#define UFFD_FEATURE_EVENT_REMOVE		(1<<3)
 #define UFFD_FEATURE_MISSING_HUGETLBFS		(1<<4)
 #define UFFD_FEATURE_MISSING_SHMEM		(1<<5)
 	__u64 features;
diff --git a/mm/madvise.c b/mm/madvise.c
index b530a4986035..ab5ef141cc9b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -479,7 +479,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 	if (!can_madv_dontneed_vma(vma))
 		return -EINVAL;
 
-	madvise_userfault_dontneed(vma, prev, start, end);
+	userfaultfd_remove(vma, prev, start, end);
 	zap_page_range(vma, start, end - start);
 	return 0;
 }
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 5a840a605a16..9eb77df568f7 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -398,12 +398,12 @@ static void *uffd_poll_thread(void *arg)
 			uffd = msg.arg.fork.ufd;
 			pollfd[0].fd = uffd;
 			break;
-		case UFFD_EVENT_MADVDONTNEED:
-			uffd_reg.range.start = msg.arg.madv_dn.start;
-			uffd_reg.range.len = msg.arg.madv_dn.end -
-				msg.arg.madv_dn.start;
+		case UFFD_EVENT_REMOVE:
+			uffd_reg.range.start = msg.arg.remove.start;
+			uffd_reg.range.len = msg.arg.remove.end -
+				msg.arg.remove.start;
 			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
-				fprintf(stderr, "madv_dn failure\n"), exit(1);
+				fprintf(stderr, "remove failure\n"), exit(1);
 			break;
 		case UFFD_EVENT_REMAP:
 			area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -570,7 +570,7 @@ static int userfaultfd_open(int features)
  * mremap, the entire monitored area is accessed in a single pass for
  * HUGETLB_TEST.
  * The release of the pages currently generates event only for
- * anonymous memory (UFFD_EVENT_MADVDONTNEED), hence it is not checked
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
  * for hugetlb and shmem.
  */
 static int faulting_process(void)
@@ -715,14 +715,14 @@ static int userfaultfd_events_test(void)
 	pid_t pid;
 	char c;
 
-	printf("testing events (fork, remap, madv_dn): ");
+	printf("testing events (fork, remap, remove): ");
 	fflush(stdout);
 
 	if (release_pages(area_dst))
 		return 1;
 
 	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
-		UFFD_FEATURE_EVENT_MADVDONTNEED;
+		UFFD_FEATURE_EVENT_REMOVE;
 	if (userfaultfd_open(features) < 0)
 		return 1;
 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
-- 
cgit v1.2.3


From 64527f5d540ad496718c7bca5e9387cf6cf94e8c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Fri, 24 Feb 2017 14:56:08 -0800
Subject: userfaultfd: non-cooperative: selftest: enable REMOVE event test for
 shmem

Now when madvise(MADV_REMOVE) notifies uffd reader, we should verify
that appliciation actually sees zeros at the removed range.

Link: http://lkml.kernel.org/r/1484814154-1557-4-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 9eb77df568f7..e9449c801888 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -569,9 +569,9 @@ static int userfaultfd_open(int features)
  * part is accessed after mremap. Since hugetlbfs does not support
  * mremap, the entire monitored area is accessed in a single pass for
  * HUGETLB_TEST.
- * The release of the pages currently generates event only for
+ * The release of the pages currently generates event for shmem and
  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
- * for hugetlb and shmem.
+ * for hugetlb.
  */
 static int faulting_process(void)
 {
@@ -610,7 +610,6 @@ static int faulting_process(void)
 		}
 	}
 
-#ifndef SHMEM_TEST
 	if (release_pages(area_dst))
 		return 1;
 
@@ -618,7 +617,6 @@ static int faulting_process(void)
 		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
 			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
 	}
-#endif /* SHMEM_TEST */
 
 #endif /* HUGETLB_TEST */
 
-- 
cgit v1.2.3


From 0c49ad4155122ae7684cc744bc5a181ba28518b5 Mon Sep 17 00:00:00 2001
From: Stas Sergeev <stsp@list.ru>
Date: Mon, 27 Feb 2017 14:27:28 -0800
Subject: tools/testing/selftests/sigaltstack/sas.c: improve output of
 sigaltstack testcase

Currently it uses %i for bitmasks, which makes it difficult to properly
decode the values.  Use %x instead.

Link: http://lkml.kernel.org/r/b7b4c45d-2f21-de6c-d1c8-16c8386da27c@list.ru
Signed-off-by: Stas Sergeev <stsp@users.sourceforge.net>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/sigaltstack/sas.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 1bb01258e559..ccd07343d418 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -57,7 +57,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags != SS_DISABLE)
-		printf("[FAIL]\tss_flags=%i, should be SS_DISABLE\n",
+		printf("[FAIL]\tss_flags=%x, should be SS_DISABLE\n",
 				stk.ss_flags);
 	else
 		printf("[OK]\tsigaltstack is disabled in sighandler\n");
@@ -122,7 +122,8 @@ int main(void)
 	if (stk.ss_flags == SS_DISABLE) {
 		printf("[OK]\tInitial sigaltstack state was SS_DISABLE\n");
 	} else {
-		printf("[FAIL]\tInitial sigaltstack state was %i; should have been SS_DISABLE\n", stk.ss_flags);
+		printf("[FAIL]\tInitial sigaltstack state was %x; "
+		       "should have been SS_DISABLE\n", stk.ss_flags);
 		return EXIT_FAILURE;
 	}
 
@@ -165,7 +166,7 @@ int main(void)
 		exit(EXIT_FAILURE);
 	}
 	if (stk.ss_flags != SS_AUTODISARM) {
-		printf("[FAIL]\tss_flags=%i, should be SS_AUTODISARM\n",
+		printf("[FAIL]\tss_flags=%x, should be SS_AUTODISARM\n",
 				stk.ss_flags);
 		exit(EXIT_FAILURE);
 	}
-- 
cgit v1.2.3


From df06a2d57711a1472ced72207373eeb6422d4721 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 1 Mar 2017 00:03:37 -0800
Subject: tools/testing/nvdimm: make iset cookie predictable

For testing changes to the iset cookie algorithm we need a value that is
constant from run-to-run.

Stop including dynamic data in the emulated region_offset values. Also,
pick values that sort in a different order depending on whether the
comparison is a memcmp() of two 8-byte arrays or subtraction of two
64-bit values.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 45be8b55a663..798f17655433 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -887,7 +887,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 0+1;
 	memdev->region_index = 4+1;
 	memdev->region_size = SPA0_SIZE/2;
-	memdev->region_offset = t->spa_set_dma[0];
+	memdev->region_offset = 1;
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
@@ -902,7 +902,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 0+1;
 	memdev->region_index = 5+1;
 	memdev->region_size = SPA0_SIZE/2;
-	memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2;
+	memdev->region_offset = (1 << 8);
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
@@ -917,7 +917,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 4+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1];
+	memdev->region_offset = (1 << 16);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -932,7 +932,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 5+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4;
+	memdev->region_offset = (1 << 24);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -947,7 +947,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 6+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 32);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -962,7 +962,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->range_index = 1+1;
 	memdev->region_index = 7+1;
 	memdev->region_size = SPA1_SIZE/4;
-	memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 40);
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
@@ -1380,7 +1380,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->range_index = 11+1;
 		memdev->region_index = 9+1;
 		memdev->region_size = SPA0_SIZE;
-		memdev->region_offset = t->spa_set_dma[2];
+		memdev->region_offset = (1ULL << 48);
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
-- 
cgit v1.2.3


From 2a4d0c627f5374f365a873dea4e10ae0bb437680 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dsafonov@virtuozzo.com>
Date: Mon, 13 Feb 2017 13:13:36 +0300
Subject: x86/selftests: Add clobbers for int80 on x86_64

Kernel erases R8..R11 registers prior returning to userspace
from int80:

  https://lkml.org/lkml/2009/10/1/164

GCC can reuse these registers and doesn't expect them to change
during syscall invocation. I met this kind of bug in CRIU once
GCC 6.1 and CLANG stored local variables in those registers
and the kernel zerofied them during syscall:

  https://github.com/xemul/criu/commit/990d33f1a1cdd17bca6c2eb059ab3be2564f7fa2

By that reason I suggest to add those registers to clobbers
in selftests.  Also, as noted by Andy - removed unneeded clobber
for flags in INT $0x80 inline asm.

Signed-off-by: Dmitry Safonov <dsafonov@virtuozzo.com>
Acked-by: Andy Lutomirski <luto@kernel.org>
Cc: 0x7f454c46@gmail.com
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kselftest@vger.kernel.org
Link: http://lkml.kernel.org/r/20170213101336.20486-1-dsafonov@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/testing/selftests/x86/fsgsbase.c            |  2 +-
 tools/testing/selftests/x86/ldt_gdt.c             | 16 +++++++++++-----
 tools/testing/selftests/x86/ptrace_syscall.c      |  3 ++-
 tools/testing/selftests/x86/single_step_syscall.c |  5 ++++-
 4 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 5b2b4b3c634c..b4967d875236 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -245,7 +245,7 @@ void do_unexpected_base(void)
 		long ret;
 		asm volatile ("int $0x80"
 			      : "=a" (ret) : "a" (243), "b" (low_desc)
-			      : "flags");
+			      : "r8", "r9", "r10", "r11");
 		memcpy(&desc, low_desc, sizeof(desc));
 		munmap(low_desc, sizeof(desc));
 
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 4af47079cf04..f6121612e769 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -45,6 +45,12 @@
 #define AR_DB			(1 << 22)
 #define AR_G			(1 << 23)
 
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
 static int nerrs;
 
 /* Points to an array of 1024 ints, each holding its own index. */
@@ -588,7 +594,7 @@ static int invoke_set_thread_area(void)
 	asm volatile ("int $0x80"
 		      : "=a" (ret), "+m" (low_user_desc) :
 			"a" (243), "b" (low_user_desc)
-		      : "flags");
+		      : INT80_CLOBBERS);
 	return ret;
 }
 
@@ -657,7 +663,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -688,7 +694,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 	if (sel != 0) {
 		result = "FAIL";
@@ -721,7 +727,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
@@ -774,7 +780,7 @@ static void test_gdt_invalidation(void)
 			"+a" (eax)
 		      : "m" (low_user_desc_clear),
 			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
-		      : "flags");
+		      : INT80_CLOBBERS);
 
 #ifdef __x86_64__
 	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index b037ce9cf116..eaea92439708 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -58,7 +58,8 @@ static void do_full_int80(struct syscall_args32 *args)
 	asm volatile ("int $0x80"
 		      : "+a" (args->nr),
 			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
-			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp));
+			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+			: : "r8", "r9", "r10", "r11");
 	args->arg5 = bp;
 #else
 	sys32_helper(args, int80_and_ret);
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 50c26358e8b7..a48da95c18fd 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -56,9 +56,11 @@ static volatile sig_atomic_t sig_traps;
 #ifdef __x86_64__
 # define REG_IP REG_RIP
 # define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
 #else
 # define REG_IP REG_EIP
 # define WIDTH "l"
+# define INT80_CLOBBERS
 #endif
 
 static unsigned long get_eflags(void)
@@ -140,7 +142,8 @@ int main()
 
 	printf("[RUN]\tSet TF and check int80\n");
 	set_eflags(get_eflags() | X86_EFLAGS_TF);
-	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid));
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+			: INT80_CLOBBERS);
 	check_result();
 
 	/*
-- 
cgit v1.2.3


From 0eb1d0fa6a68324b51db4f7ae16d9b042c5b2de4 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@kernel.org>
Date: Wed, 22 Feb 2017 07:36:17 -0800
Subject: selftests/x86: Add a basic selftest for ioperm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This doesn't fully exercise the interaction between KVM and ioperm(),
but it does test basic functionality.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/testing/selftests/x86/Makefile |   2 +-
 tools/testing/selftests/x86/ioperm.c | 170 +++++++++++++++++++++++++++++++++++
 2 files changed, 171 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/x86/ioperm.c

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 83d8b1c6cb0e..bed3e6086cb1 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
 TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall test_mremap_vdso \
-			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test \
+			check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test ioperm \
 			protection_keys test_vdso
 TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
new file mode 100644
index 000000000000..b77313ba2ab1
--- /dev/null
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -0,0 +1,170 @@
+/*
+ * ioperm.c - Test case for ioperm(2)
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static bool try_outb(unsigned short port)
+{
+	sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+	if (sigsetjmp(jmpbuf, 1) != 0) {
+		return false;
+	} else {
+		asm volatile ("outb %%al, %w[port]"
+			      : : [port] "Nd" (port), "a" (0));
+		return true;
+	}
+	clearhandler(SIGSEGV);
+}
+
+static void expect_ok(unsigned short port)
+{
+	if (!try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp(unsigned short port)
+{
+	if (try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+int main(void)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");
+
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/*
+	 * Probe for ioperm support.  Note that clearing ioperm bits
+	 * works even as nonroot.
+	 */
+	printf("[RUN]\tenable 0x80\n");
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
+		       errno);
+		return 0;
+	}
+	expect_ok(0x80);
+	expect_gp(0xed);
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/* Make sure that fork() preserves ioperm. */
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+
+	pid_t child = fork();
+	if (child == -1)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tchild: check that we inherited permissions\n");
+		expect_ok(0x80);
+		expect_gp(0xed);
+		return 0;
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+
+	/* Test the capability checks. */
+
+	printf("\tDrop privileges\n");
+	if (setresuid(1, 1, 1) != 0) {
+		printf("[WARN]\tDropping privileges failed\n");
+		return 0;
+	}
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	printf("[OK]\tit worked\n");
+
+	printf("[RUN]\tenable 0x80 again\n");
+	if (ioperm(0x80, 1, 1) == 0) {
+		printf("[FAIL]\tit succeeded but should have failed.\n");
+		return 1;
+	}
+	printf("[OK]\tit failed\n");
+	return 0;
+}
-- 
cgit v1.2.3


From e53aff45c490ea04aa5d9e3cffa65b6b54397455 Mon Sep 17 00:00:00 2001
From: Shuah Khan <shuahkh@osg.samsung.com>
Date: Wed, 1 Mar 2017 15:15:07 -0700
Subject: selftests: lib.mk Fix individual test builds

In commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT"), added
support to generate compile targets in a user specified directory. OUTPUT
variable controls the location which is undefined when tests are built in
the test directory or with "make -C tools/testing/selftests/x86".

make -C tools/testing/selftests/x86/
make: Entering directory '/lkml/linux_4.11/tools/testing/selftests/x86'
Makefile:44: warning: overriding recipe for target 'clean'
../lib.mk:51: warning: ignoring old recipe for target 'clean'
gcc -m64 -o /single_step_syscall_64 -O2 -g -std=gnu99 -pthread -Wall  single_step_syscall.c -lrt -ldl
/usr/bin/ld: cannot open output file /single_step_syscall_64: Permission denied
collect2: error: ld returned 1 exit status
Makefile:50: recipe for target '/single_step_syscall_64' failed
make: *** [/single_step_syscall_64] Error 1
make: Leaving directory '/lkml/linux_4.11/tools/testing/selftests/x86'

Same failure with "cd tools/testing/selftests/x86/;make" run.

Fix this with a change to lib.mk to define OUTPUT to be the pwd when
MAKELEVEL is 0. This covers both cases mentioned above.

Reported-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 tools/testing/selftests/lib.mk | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index ce96d80ad64f..775c589ac3c0 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -2,6 +2,10 @@
 # Makefile can operate with or without the kbuild infrastructure.
 CC := $(CROSS_COMPILE)gcc
 
+ifeq (0,$(MAKELEVEL))
+OUTPUT := $(shell pwd)
+endif
+
 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
 
-- 
cgit v1.2.3


From a6d8a21596df041f36f4c2ccc260c459e3e851f1 Mon Sep 17 00:00:00 2001
From: Sachin Sant <sachinp@linux.vnet.ibm.com>
Date: Sun, 26 Feb 2017 11:38:39 +0530
Subject: selftest/powerpc: Fix false failures for skipped tests

Tests under alignment subdirectory are skipped when executed on previous
generation hardware, but harness still marks them as failed.

  test: test_copy_unaligned
  tags: git_version:unknown
  [SKIP] Test skipped on line 26
  skip: test_copy_unaligned
  selftests: copy_unaligned [FAIL]

The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till
the program exit which causes the test to be marked as failed.

This patch resets the value before returning to the main() routine.
With this patch the test o/p is as follows:

  test: test_copy_unaligned
  tags: git_version:unknown
  [SKIP] Test skipped on line 26
  skip: test_copy_unaligned
  selftests: copy_unaligned [PASS]

Signed-off-by: Sachin Sant <sachinp@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/harness.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 248a820048df..66d31de60b9a 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name)
 
 	rc = run_test(test_function, name);
 
-	if (rc == MAGIC_SKIP_RETURN_VALUE)
+	if (rc == MAGIC_SKIP_RETURN_VALUE) {
 		test_skip(name);
-	else
+		/* so that skipped test is not marked as failed */
+		rc = 0;
+	} else
 		test_finish(name, rc);
 
 	return rc;
-- 
cgit v1.2.3


From 2eacc79c27eb683c4a3ded80c2629387ee0d4e04 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sat, 18 Feb 2017 07:31:00 -0500
Subject: radix tree test suite: Add test for idr_get_next()

Assert that idr_get_next() returns the next populated entry in the tree with
an ID greater than or equal to the value pointed to by @nextid argument.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index a26098c6123d..f20690ac3a97 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -153,6 +153,30 @@ void idr_nowait_test(void)
 	idr_destroy(&idr);
 }
 
+void idr_get_next_test(void)
+{
+	unsigned long i;
+	int nextid;
+	DEFINE_IDR(idr);
+
+	int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+	for(i = 0; indices[i]; i++) {
+		struct item *item = item_create(indices[i], 0);
+		assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+				 GFP_KERNEL) == indices[i]);
+	}
+
+	for(i = 0, nextid = 0; indices[i]; i++) {
+		idr_get_next(&idr, &nextid);
+		assert(nextid == indices[i]);
+		nextid++;
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
 void idr_checks(void)
 {
 	unsigned long i;
@@ -202,6 +226,7 @@ void idr_checks(void)
 	idr_alloc_test();
 	idr_null_test();
 	idr_nowait_test();
+	idr_get_next_test();
 }
 
 /*
-- 
cgit v1.2.3


From 166bb1f532fd9fe1b81c6b411ad5d5c9dd21a685 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 20 Feb 2017 06:40:00 -0500
Subject: radix tree test suite: Add tests for ida_simple_get() and
 ida_simple_remove()

Assert that ida_simple_get() allocates an id in the passed range or returns
error on failure, and ida_simple_remove() releases an allocated id.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/idr-test.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index f20690ac3a97..86de901fa5c6 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -387,6 +387,24 @@ void ida_check_random(void)
 		goto repeat;
 }
 
+void ida_simple_get_remove_test(void)
+{
+	DEFINE_IDA(ida);
+	unsigned long i;
+
+	for (i = 0; i < 10000; i++) {
+		assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+	}
+	assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 10000; i++) {
+		ida_simple_remove(&ida, i);
+	}
+	assert(ida_is_empty(&ida));
+
+	ida_destroy(&ida);
+}
+
 void ida_checks(void)
 {
 	DEFINE_IDA(ida);
@@ -453,6 +471,7 @@ void ida_checks(void)
 	ida_check_max();
 	ida_check_conv();
 	ida_check_random();
+	ida_simple_get_remove_test();
 
 	radix_tree_cpu_dead(1);
 }
-- 
cgit v1.2.3


From c629a344accd15022dd6d87fa28c8e22f978fbda Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sun, 26 Feb 2017 06:33:00 -0500
Subject: radix tree test suite: Add test for radix_tree_clear_tags()

Assert that radix_tree_clear_tags() clears the tags on the passed node and
slot. Assert that the case where the radix tree has only one entry at index
zero and the node is NULL, is also handled.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/tag_check.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index d4ff00989245..36dcf7d6945d 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -330,6 +330,34 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
+void radix_tree_clear_tags_test(void)
+{
+	unsigned long index;
+	struct radix_tree_node *node;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 0);
+	item_tag_set(&tree, 0, 0);
+	__radix_tree_lookup(&tree, 0, &node, &slot);
+	radix_tree_clear_tags(&tree, node, slot);
+	assert(item_tag_get(&tree, 0, 0) == 0);
+
+	for (index = 0; index < 1000; index++) {
+		item_insert(&tree, index);
+		item_tag_set(&tree, index, 0);
+	}
+
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_clear_tags(&tree, iter.node, slot);
+		assert(item_tag_get(&tree, iter.index, 0) == 0);
+	}
+
+	item_kill_tree(&tree);
+}
+
 void tag_check(void)
 {
 	single_check();
@@ -347,4 +375,5 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+	radix_tree_clear_tags_test();
 }
-- 
cgit v1.2.3


From 0d4a41c1a0335dc515c6e7fabd447263b57f6457 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Sun, 26 Feb 2017 16:17:00 -0500
Subject: radix tree test suite: Add performance benchmarks

Add performance benchmarks for radix tree insertion, tagging and deletion.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 82 +++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 9b09ddfe462f..35741b9c2a4a 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -17,6 +17,9 @@
 #include <time.h>
 #include "test.h"
 
+#define for_each_index(i, base, order) \
+	        for (i = base; i < base + (1 << order); i++)
+
 #define NSEC_PER_SEC	1000000000L
 
 static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
@@ -57,22 +60,87 @@ again:
 	return nsec;
 }
 
+static void benchmark_insert(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		item_insert_order(root, index, order);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		radix_tree_tag_set(root, index, 0);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index, i;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		for_each_index(i, index, order)
+			item_delete(root, i);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+		size, step, order, nsec);
+}
+
 static void benchmark_size(unsigned long size, unsigned long step, int order)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
 	long long normal, tagged;
-	unsigned long index;
 
-	for (index = 0 ; index < size ; index += step) {
-		item_insert_order(&tree, index, order);
-		radix_tree_tag_set(&tree, index, 0);
-	}
+	benchmark_insert(&tree, size, step, order);
+	benchmark_tagging(&tree, size, step, order);
 
 	tagged = benchmark_iter(&tree, true);
 	normal = benchmark_iter(&tree, false);
 
-	printv(2, "Size %ld, step %6ld, order %d tagged %10lld ns, normal %10lld ns\n",
-		size, step, order, tagged, normal);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+		size, step, order, tagged);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+		size, step, order, normal);
+
+	benchmark_delete(&tree, size, step, order);
 
 	item_kill_tree(&tree);
 	rcu_barrier();
-- 
cgit v1.2.3


From 6478581c85cd3091a6188a2433a8093f335f8f2a Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 07:53:00 -0500
Subject: radix tree test suite: Add performance test for radix_tree_split()

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 44 ++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 35741b9c2a4a..b03c3f30c740 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -146,6 +146,46 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	rcu_barrier();
 }
 
+static long long  __benchmark_split(unsigned long index,
+				    int old_order, int new_order)
+{
+	struct timespec start, finish;
+	long long nsec;
+	RADIX_TREE(tree, GFP_ATOMIC);
+
+	item_insert_order(&tree, index, old_order);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_split(&tree, index, new_order);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+
+	for (idx = 0; idx < size; idx += step) {
+		for (i = 3; i < 11; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_split(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+			size, step, nsec);
+
+}
+
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -163,4 +203,8 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_size(size[c], step[s] << 9, 9);
+
+	for (c = 0; size[c]; c++)
+		for (s = 0; step[s]; s++)
+			benchmark_split(size[c], step[s]);
 }
-- 
cgit v1.2.3


From 54f4d3341c8fe31e20915e2c1fb322ff8a069832 Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 08:11:00 -0500
Subject: radix tree test suite: Add performance test for radix_tree_join()

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/benchmark.c | 47 ++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index b03c3f30c740..99c40f3ed133 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -186,6 +186,50 @@ static void benchmark_split(unsigned long size, unsigned long step)
 
 }
 
+static long long  __benchmark_join(unsigned long index,
+			     unsigned order1, unsigned order2)
+{
+	unsigned long loc;
+	struct timespec start, finish;
+	long long nsec;
+	void *item, *item2 = item_create(index + 1, order1);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert_order(&tree, index, order2);
+	item = radix_tree_lookup(&tree, index);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_join(&tree, index + 1, order1, item2);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+		(finish.tv_nsec - start.tv_nsec);
+
+	loc = find_item(&tree, item);
+	if (loc == -1)
+		free(item);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+	for (idx = 0; idx < 1 << 10; idx += step) {
+		for (i = 1; i < 15; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_join(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+			1 << 10, step, nsec);
+}
+
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -207,4 +251,7 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_split(size[c], step[s]);
+
+	for (s = 0; step[s]; s++)
+		benchmark_join(step[s]);
 }
-- 
cgit v1.2.3


From c4634b08d9eb9c13be2296230bf33c79390dbf9c Mon Sep 17 00:00:00 2001
From: Rehas Sachdeva <aquannie@gmail.com>
Date: Mon, 27 Feb 2017 08:49:00 -0500
Subject: radix tree test suite: Build 32 bit binaries

Add option 'make BUILD=32' for building 32-bit binaries.

Signed-off-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index f11315bedefc..b59c2a9ef778 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -10,6 +10,10 @@ ifndef SHIFT
 	SHIFT=3
 endif
 
+ifeq ($(BUILD), 32)
+	CFLAGS += -m32
+endif
+
 targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
-- 
cgit v1.2.3


From 284d96a494d705b9c5330c900e976fceda885b9f Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 2 Mar 2017 04:29:00 -0500
Subject: radix tree test suite: Fix build with --as-needed

Currently the radix tree test suite doesn't build with toolchains that
use --as-needed by default, for example Ubuntu's:

  cc -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address -lpthread -lurcu main.o ... -o main
  /usr/bin/ld: regression1.o: undefined reference to symbol 'pthread_join@@GLIBC_2.17'
  /lib/powerpc64le-linux-gnu/libpthread.so.0: error adding symbols: DSO missing from command line
  collect2: error: ld returned 1 exit status

This is caused by the custom makefile rules placing LDFLAGS before the
.o files that need the libraries.

We could fix it by using --no-as-needed, or rewriting the custom rules.
But we can also just drop the custom rules and move the libraries to
LDLIBS, and then the default rules work correctly - with the one caveat
that we need to add -fsanitize=address to LDFLAGS because that must be
passed to the linker as well as the compiler.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index b59c2a9ef778..022488f50fc6 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,6 +1,7 @@
 
 CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
-LDFLAGS += -lpthread -lurcu
+LDFLAGS += -fsanitize=address
+LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder
 CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
@@ -17,13 +18,10 @@ endif
 targets: mapshift $(TARGETS)
 
 main:	$(OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o main
 
 idr-test: idr-test.o $(CORE_OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o idr-test
 
 multiorder: multiorder.o $(CORE_OFILES)
-	$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
 
 clean:
 	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
-- 
cgit v1.2.3


From 3f1b6f9d49ba5a209d745fa2448657d8b66ed0c0 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Thu, 2 Mar 2017 12:24:28 -0500
Subject: radix tree test suite: Depend on Makefile and quieten grep

Changing the CFLAGS in the Makefile didn't always lead to a
recompilation because the OFILES didn't depend on the Makefile.
Also, after doing make clean, grep would still complain about
a missing map-shift.h; we need -s as well as -q.

Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 022488f50fc6..4c6289c5d415 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -28,7 +28,7 @@ clean:
 
 vpath %.c ../../lib
 
-$(OFILES): *.h */*.h generated/map-shift.h \
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
 	../../../include/linux/radix-tree.h \
@@ -43,7 +43,7 @@ idr.c: ../../../lib/idr.c
 .PHONY: mapshift
 
 mapshift:
-	@if ! grep -qw $(SHIFT) generated/map-shift.h; then		\
+	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
 		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi
-- 
cgit v1.2.3


From 4ecd9542dbc3e07f3bd3870aac12839f72b47db4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Mar 2017 12:16:10 -0500
Subject: ida: Free correct IDA bitmap

There's a relatively rare race where we look at the per-cpu preallocated
IDA bitmap, see it's NULL, allocate a new one, and atomically update it.
If the kmalloc() happened to sleep and we were rescheduled to a different
CPU, or an interrupt came in at the exact right time, another task
might have successfully allocated a bitmap and already deposited it.
I forgot what the semantics of cmpxchg() were and ended up freeing the
wrong bitmap leading to KASAN reporting a use-after-free.

Dmitry found the bug with syzkaller & wrote the patch.  I wrote the test
case that will reproduce the bug without his patch being applied.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 lib/radix-tree.c                    |  4 ++--
 tools/testing/radix-tree/idr-test.c | 34 +++++++++++++++++++++++++++++++---
 tools/testing/radix-tree/main.c     |  1 +
 tools/testing/radix-tree/test.h     |  1 +
 4 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'tools/testing')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 5ed506d648c4..691a9ad48497 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -2129,8 +2129,8 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
 		if (!bitmap)
 			return 0;
-		bitmap = this_cpu_cmpxchg(ida_bitmap, NULL, bitmap);
-		kfree(bitmap);
+		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
+			kfree(bitmap);
 	}
 
 	return 1;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 86de901fa5c6..30cd0b296f1a 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -363,7 +363,7 @@ void ida_check_random(void)
 {
 	DEFINE_IDA(ida);
 	DECLARE_BITMAP(bitmap, 2048);
-	int id;
+	int id, err;
 	unsigned int i;
 	time_t s = time(NULL);
 
@@ -377,8 +377,11 @@ void ida_check_random(void)
 			ida_remove(&ida, bit);
 		} else {
 			__set_bit(bit, bitmap);
-			ida_pre_get(&ida, GFP_KERNEL);
-			assert(!ida_get_new_above(&ida, bit, &id));
+			do {
+				ida_pre_get(&ida, GFP_KERNEL);
+				err = ida_get_new_above(&ida, bit, &id);
+			} while (err == -ENOMEM);
+			assert(!err);
 			assert(id == bit);
 		}
 	}
@@ -476,11 +479,36 @@ void ida_checks(void)
 	radix_tree_cpu_dead(1);
 }
 
+static void *ida_random_fn(void *arg)
+{
+	rcu_register_thread();
+	ida_check_random();
+	rcu_unregister_thread();
+	return NULL;
+}
+
+void ida_thread_tests(void)
+{
+	pthread_t threads[10];
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++)
+		if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+			perror("creating ida thread");
+			exit(1);
+		}
+
+	while (i--)
+		pthread_join(threads[i], NULL);
+}
+
 int __weak main(void)
 {
 	radix_tree_init();
 	idr_checks();
 	ida_checks();
+	ida_thread_tests();
+	radix_tree_cpu_dead(1);
 	rcu_barrier();
 	if (nr_allocated)
 		printf("nr_allocated = %d\n", nr_allocated);
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b829127d5670..bc9a78449572 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -368,6 +368,7 @@ int main(int argc, char **argv)
 	iteration_test(0, 10 + 90 * long_run);
 	iteration_test(7, 10 + 90 * long_run);
 	single_thread_tests(long_run);
+	ida_thread_tests();
 
 	/* Free any remaining preallocated nodes */
 	radix_tree_cpu_dead(0);
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index b30e11d9d271..0f8220cc6166 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -36,6 +36,7 @@ void iteration_test(unsigned order, unsigned duration);
 void benchmark(void);
 void idr_checks(void);
 void ida_checks(void);
+void ida_thread_tests(void);
 
 struct item *
 item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
-- 
cgit v1.2.3


From f0f3f2d0a3e0f7c48163fd8b45f5909d46e7f371 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <mawilcox@microsoft.com>
Date: Fri, 3 Mar 2017 12:28:37 -0500
Subject: radix tree test suite: Specify -m32 in LDFLAGS too

Michael's patch to use the default make rule for linking and the patch
from Rehas to use -m32 if building a 32-bit test-suite on a 64-bit
platform don't work well together.

Reported-by: Rehas Sachdeva <aquannie@gmail.com>
Signed-off-by: Matthew Wilcox <mawilcox@microsoft.com>
---
 tools/testing/radix-tree/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools/testing')

diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 4c6289c5d415..6a9480c03cbd 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -13,6 +13,7 @@ endif
 
 ifeq ($(BUILD), 32)
 	CFLAGS += -m32
+	LDFLAGS += -m32
 endif
 
 targets: mapshift $(TARGETS)
-- 
cgit v1.2.3


From 99c014a87979c9244806685f3c3fc7767f79f645 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Mar 2017 10:16:17 -0500
Subject: ktest: Fix while loop in wait_for_input

The run_command function was changed to use the wait_for_input function to
allow having a timeout if the command to run takes too much time. There was
a bug in the wait_for_input where it could end up going into an infinite
loop. There's two issues here. One is that the return value of the sysread
wasn't used for the write (to write a proper size), and that it should
continue processing the passed in file descriptor too even if there was
input. There was no check for error, if for some reason STDIN returned an
error, the function would go into an infinite loop and never exit.

Reported-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Fixes: 6e98d1b4415f ("ktest: Add timeout to ssh command")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 29470b554711..0c006a2f165d 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1904,11 +1904,12 @@ sub wait_for_input
 
 	# copy data from stdin to the console
 	if (vec($rout, fileno(\*STDIN), 1) == 1) {
-	    sysread(\*STDIN, $buf, 1000);
-	    syswrite($fp, $buf, 1000);
-	    next;
+	    $nr = sysread(\*STDIN, $buf, 1000);
+	    syswrite($fp, $buf, $nr) if ($nr > 0);
 	}
 
+	next if (vec($rout, fileno($fp), 1) != 1);
+
 	$line = "";
 
 	# try to read one char at a time
-- 
cgit v1.2.3


From f7c6401ff84ab8ffffc281a29aa0a787f7eb346e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 8 Mar 2017 10:36:59 -0500
Subject: ktest: Make sure wait_for_input does honor the timeout

The function wait_for_input takes in a timeout, and even has a default
timeout. But if for some reason the STDIN descriptor keeps sending in data,
the function will never time out. The timout is to wait for the data from
the passed in file descriptor, not for STDIN. Adding a test in the case
where there's no data from the passed in file descriptor that checks to see
if the timeout passed, will ensure that it will timeout properly even if
there's input in STDIN.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 tools/testing/ktest/ktest.pl | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 0c006a2f165d..49f7c8b9d9c4 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -1880,6 +1880,7 @@ sub get_grub_index {
 sub wait_for_input
 {
     my ($fp, $time) = @_;
+    my $start_time;
     my $rin;
     my $rout;
     my $nr;
@@ -1895,12 +1896,12 @@ sub wait_for_input
     vec($rin, fileno($fp), 1) = 1;
     vec($rin, fileno(\*STDIN), 1) = 1;
 
+    $start_time = time;
+
     while (1) {
 	$nr = select($rout=$rin, undef, undef, $time);
 
-	if ($nr <= 0) {
-	    return undef;
-	}
+	last if ($nr <= 0);
 
 	# copy data from stdin to the console
 	if (vec($rout, fileno(\*STDIN), 1) == 1) {
@@ -1908,7 +1909,11 @@ sub wait_for_input
 	    syswrite($fp, $buf, $nr) if ($nr > 0);
 	}
 
-	next if (vec($rout, fileno($fp), 1) != 1);
+	# The timeout is based on time waiting for the fp data
+	if (vec($rout, fileno($fp), 1) != 1) {
+	    last if (defined($time) && (time - $start_time > $time));
+	    next;
+	}
 
 	$line = "";
 
@@ -1918,12 +1923,11 @@ sub wait_for_input
 	    last if ($ch eq "\n");
 	}
 
-	if (!length($line)) {
-	    return undef;
-	}
+	last if (!length($line));
 
 	return $line;
     }
+    return undef;
 }
 
 sub reboot_to {
-- 
cgit v1.2.3


From 605df8d674ac65e044a0bf4998b28c2f350b7f9e Mon Sep 17 00:00:00 2001
From: Cyril Bur <cyrilbur@gmail.com>
Date: Tue, 7 Mar 2017 11:39:31 +1100
Subject: selftests/powerpc: Replace stxvx and lxvx with stxvd2x/lxvd2x

On POWER8 (ISA 2.07) lxvx and stxvx are defined to be extended mnemonics
of lxvd2x and stxvd2x. For POWER9 (ISA 3.0) the HW architects in their
infinite wisdom made lxvx and stxvx instructions in their own right.

POWER9 aware GCC will use the POWER9 instruction for lxvx and stxvx
causing these selftests to fail on POWER8. Further compounding the
issue, because of the way -mvsx works it will cause the power9
instructions to be used regardless of -mcpu=power8 to GCC or -mpower8 to
AS.

The safest way to address the problem for now is to not use the extended
mnemonic. We don't care how the CPU loads the values from memory since
the tests only performs register comparisons, so using stdvd2x/lxvd2x
does not impact the test.

Signed-off-by: Cyril Bur <cyrilbur@gmail.com>
Acked-by: Balbir Singh<bsingharora@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/testing/selftests/powerpc/include/vsx_asm.h | 48 +++++++++++------------
 1 file changed, 24 insertions(+), 24 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
index d828bfb6ef2d..54064ced9e95 100644
--- a/tools/testing/selftests/powerpc/include/vsx_asm.h
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -16,56 +16,56 @@
  */
 FUNC_START(load_vsx)
 	li	r5,0
-	lxvx	vs20,r5,r3
+	lxvd2x	vs20,r5,r3
 	addi	r5,r5,16
-	lxvx	vs21,r5,r3
+	lxvd2x	vs21,r5,r3
 	addi	r5,r5,16
-	lxvx	vs22,r5,r3
+	lxvd2x	vs22,r5,r3
 	addi	r5,r5,16
-	lxvx	vs23,r5,r3
+	lxvd2x	vs23,r5,r3
 	addi	r5,r5,16
-	lxvx	vs24,r5,r3
+	lxvd2x	vs24,r5,r3
 	addi	r5,r5,16
-	lxvx	vs25,r5,r3
+	lxvd2x	vs25,r5,r3
 	addi	r5,r5,16
-	lxvx	vs26,r5,r3
+	lxvd2x	vs26,r5,r3
 	addi	r5,r5,16
-	lxvx	vs27,r5,r3
+	lxvd2x	vs27,r5,r3
 	addi	r5,r5,16
-	lxvx	vs28,r5,r3
+	lxvd2x	vs28,r5,r3
 	addi	r5,r5,16
-	lxvx	vs29,r5,r3
+	lxvd2x	vs29,r5,r3
 	addi	r5,r5,16
-	lxvx	vs30,r5,r3
+	lxvd2x	vs30,r5,r3
 	addi	r5,r5,16
-	lxvx	vs31,r5,r3
+	lxvd2x	vs31,r5,r3
 	blr
 FUNC_END(load_vsx)
 
 FUNC_START(store_vsx)
 	li	r5,0
-	stxvx	vs20,r5,r3
+	stxvd2x	vs20,r5,r3
 	addi	r5,r5,16
-	stxvx	vs21,r5,r3
+	stxvd2x	vs21,r5,r3
 	addi	r5,r5,16
-	stxvx	vs22,r5,r3
+	stxvd2x	vs22,r5,r3
 	addi	r5,r5,16
-	stxvx	vs23,r5,r3
+	stxvd2x	vs23,r5,r3
 	addi	r5,r5,16
-	stxvx	vs24,r5,r3
+	stxvd2x	vs24,r5,r3
 	addi	r5,r5,16
-	stxvx	vs25,r5,r3
+	stxvd2x	vs25,r5,r3
 	addi	r5,r5,16
-	stxvx	vs26,r5,r3
+	stxvd2x	vs26,r5,r3
 	addi	r5,r5,16
-	stxvx	vs27,r5,r3
+	stxvd2x	vs27,r5,r3
 	addi	r5,r5,16
-	stxvx	vs28,r5,r3
+	stxvd2x	vs28,r5,r3
 	addi	r5,r5,16
-	stxvx	vs29,r5,r3
+	stxvd2x	vs29,r5,r3
 	addi	r5,r5,16
-	stxvx	vs30,r5,r3
+	stxvd2x	vs30,r5,r3
 	addi	r5,r5,16
-	stxvx	vs31,r5,r3
+	stxvd2x	vs31,r5,r3
 	blr
 FUNC_END(store_vsx)
-- 
cgit v1.2.3


From 46aa6a302b53f543f8e8b8e1714dc5e449ad36a6 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Thu, 9 Mar 2017 16:17:14 -0800
Subject: userfaultfd: selftest: vm: allow to build in vm/ directory

linux/tools/testing/selftests/vm $ make

  gcc -Wall -I ../../../../usr/include     compaction_test.c -lrt -o /compaction_test
  /usr/lib/gcc/x86_64-pc-linux-gnu/4.9.4/../../../../x86_64-pc-linux-gnu/bin/ld: cannot open output file /compaction_test: Permission denied
  collect2: error: ld returned 1 exit status
  make: *** [../lib.mk:54: /compaction_test] Error 1

Since commit a8ba798bc8ec ("selftests: enable O and KBUILD_OUTPUT")
selftests/vm build fails if run from the "selftests/vm" directory, but
it works in the selftests/ directory.  It's quicker to be able to do a
local vm-only build after a tree wipe and this patch allows for it
again.

Link: http://lkml.kernel.org/r/20170302173738.18994-4-aarcange@redhat.com
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 4cff7e7ddcc4..41642ba5e318 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,5 +1,9 @@
 # Makefile for vm selftests
 
+ifndef OUTPUT
+  OUTPUT := $(shell pwd)
+endif
+
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
-- 
cgit v1.2.3


From 1da8ac7c49fb2879ba95006d8bd1095e6870ea1a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Fri, 10 Mar 2017 22:05:55 -0800
Subject: selftests/bpf: fix broken build

Recent merge of 'linux-kselftest-4.11-rc1' tree broke bpf test build.
None of the tests were building and test_verifier.c had tons of compiler errors.
Fix it and add #ifdef CAP_IS_SUPPORTED to support old versions of libcap.
Tested on centos 6.8 and 7

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/include/uapi/linux/bpf_perf_event.h   | 18 ++++++++++++++++++
 tools/testing/selftests/bpf/Makefile        |  4 +++-
 tools/testing/selftests/bpf/test_verifier.c |  4 ++++
 3 files changed, 25 insertions(+), 1 deletion(-)
 create mode 100644 tools/include/uapi/linux/bpf_perf_event.h

(limited to 'tools/testing')

diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 000000000000..067427259820
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+
+struct bpf_perf_event_data {
+	struct pt_regs regs;
+	__u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4b498265dae6..67531f47781b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,12 +1,14 @@
 LIBDIR := ../../../lib
 BPFOBJ := $(LIBDIR)/bpf/bpf.o
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR)
+CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
+all: $(TEST_GEN_PROGS)
+
 .PHONY: all clean force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index e1f5b9eea1e8..d1555e4240c0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -8,6 +8,8 @@
  * License as published by the Free Software Foundation.
  */
 
+#include <asm/types.h>
+#include <linux/types.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -4583,10 +4585,12 @@ static bool is_admin(void)
 	cap_flag_value_t sysadmin = CAP_CLEAR;
 	const cap_value_t cap_val = CAP_SYS_ADMIN;
 
+#ifdef CAP_IS_SUPPORTED
 	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
 		perror("cap_get_flag");
 		return false;
 	}
+#endif
 	caps = cap_get_proc();
 	if (!caps) {
 		perror("cap_get_proc");
-- 
cgit v1.2.3


From e8f1f34a344d060eaf1918089369c4c1172a153b Mon Sep 17 00:00:00 2001
From: Zi Shen Lim <zlim.lnx@gmail.com>
Date: Sun, 19 Mar 2017 23:03:14 -0700
Subject: selftests/bpf: fix broken build, take 2

Merge of 'linux-kselftest-4.11-rc1':

1. Partially removed use of 'test_objs' target, breaking force rebuild of
BPFOBJ, introduced in commit d498f8719a09 ("bpf: Rebuild bpf.o for any
dependency update").

  Update target so dependency on BPFOBJ is restored.

2. Introduced commit 2047f1d8ba28 ("selftests: Fix the .c linking rule")
which fixes order of LDLIBS.

  Commit d02d8986a768 ("bpf: Always test unprivileged programs") added
libcap dependency into CFLAGS. Use LDLIBS instead to fix linking of
test_verifier.

3. Introduced commit d83c3ba0b926 ("selftests: Fix selftests build to
just build, not run tests").

  Reordering the Makefile allows us to remove the 'all' target.

Tested both:
    selftests/bpf$ make
and
    selftests$ make TARGETS=bpf
on Ubuntu 16.04.2.

Signed-off-by: Zi Shen Lim <zlim.lnx@gmail.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Tested-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Shuah Khan <shuahkh@osg.samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/bpf/Makefile | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'tools/testing')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 67531f47781b..6a1ad58cb66f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,22 +1,23 @@
 LIBDIR := ../../../lib
-BPFOBJ := $(LIBDIR)/bpf/bpf.o
+BPFDIR := $(LIBDIR)/bpf
 
-CFLAGS += -Wall -O2 -lcap -I../../../include/uapi -I$(LIBDIR) $(BPFOBJ)
+CFLAGS += -Wall -O2 -I../../../include/uapi -I$(LIBDIR)
+LDLIBS += -lcap
 
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map
 
 TEST_PROGS := test_kmod.sh
 
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/bpf.o
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
 
-.PHONY: all clean force
+.PHONY: force
 
 # force a rebuild of BPFOBJ when its dependencies are updated
 force:
 
 $(BPFOBJ): force
-	$(MAKE) -C $(dir $(BPFOBJ))
-
-$(test_objs): $(BPFOBJ)
-
-include ../lib.mk
+	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
-- 
cgit v1.2.3


From 8c290e60fa2a51806159522331c9ed41252a8fb3 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@fb.com>
Date: Tue, 21 Mar 2017 19:05:04 -0700
Subject: bpf: fix hashmap extra_elems logic

In both kmalloc and prealloc mode the bpf_map_update_elem() is using
per-cpu extra_elems to do atomic update when the map is full.
There are two issues with it. The logic can be misused, since it allows
max_entries+num_cpus elements to be present in the map. And alloc_extra_elems()
at map creation time can fail percpu alloc for large map values with a warn:
WARNING: CPU: 3 PID: 2752 at ../mm/percpu.c:892 pcpu_alloc+0x119/0xa60
illegal size (32824) or align (8) for percpu allocation

The fixes for both of these issues are different for kmalloc and prealloc modes.
For prealloc mode allocate extra num_possible_cpus elements and store
their pointers into extra_elems array instead of actual elements.
Hence we can use these hidden(spare) elements not only when the map is full
but during bpf_map_update_elem() that replaces existing element too.
That also improves performance, since pcpu_freelist_pop/push is avoided.
Unfortunately this approach cannot be used for kmalloc mode which needs
to kfree elements after rcu grace period. Therefore switch it back to normal
kmalloc even when full and old element exists like it was prior to
commit 6c9059817432 ("bpf: pre-allocate hash map elements").

Add tests to check for over max_entries and large map values.

Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 kernel/bpf/hashtab.c                    | 144 ++++++++++++++++----------------
 tools/testing/selftests/bpf/test_maps.c |  29 ++++++-
 2 files changed, 97 insertions(+), 76 deletions(-)

(limited to 'tools/testing')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index afe5bab376c9..361a69dfe543 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -30,18 +30,12 @@ struct bpf_htab {
 		struct pcpu_freelist freelist;
 		struct bpf_lru lru;
 	};
-	void __percpu *extra_elems;
+	struct htab_elem *__percpu *extra_elems;
 	atomic_t count;	/* number of elements in this hashtable */
 	u32 n_buckets;	/* number of hash buckets */
 	u32 elem_size;	/* size of each element in bytes */
 };
 
-enum extra_elem_state {
-	HTAB_NOT_AN_EXTRA_ELEM = 0,
-	HTAB_EXTRA_ELEM_FREE,
-	HTAB_EXTRA_ELEM_USED
-};
-
 /* each htab element is struct htab_elem + key + value */
 struct htab_elem {
 	union {
@@ -56,7 +50,6 @@ struct htab_elem {
 	};
 	union {
 		struct rcu_head rcu;
-		enum extra_elem_state state;
 		struct bpf_lru_node lru_node;
 	};
 	u32 hash;
@@ -77,6 +70,11 @@ static bool htab_is_percpu(const struct bpf_htab *htab)
 		htab->map.map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
 }
 
+static bool htab_is_prealloc(const struct bpf_htab *htab)
+{
+	return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
+}
+
 static inline void htab_elem_set_ptr(struct htab_elem *l, u32 key_size,
 				     void __percpu *pptr)
 {
@@ -128,17 +126,20 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
 
 static int prealloc_init(struct bpf_htab *htab)
 {
+	u32 num_entries = htab->map.max_entries;
 	int err = -ENOMEM, i;
 
-	htab->elems = bpf_map_area_alloc(htab->elem_size *
-					 htab->map.max_entries);
+	if (!htab_is_percpu(htab) && !htab_is_lru(htab))
+		num_entries += num_possible_cpus();
+
+	htab->elems = bpf_map_area_alloc(htab->elem_size * num_entries);
 	if (!htab->elems)
 		return -ENOMEM;
 
 	if (!htab_is_percpu(htab))
 		goto skip_percpu_elems;
 
-	for (i = 0; i < htab->map.max_entries; i++) {
+	for (i = 0; i < num_entries; i++) {
 		u32 size = round_up(htab->map.value_size, 8);
 		void __percpu *pptr;
 
@@ -166,11 +167,11 @@ skip_percpu_elems:
 	if (htab_is_lru(htab))
 		bpf_lru_populate(&htab->lru, htab->elems,
 				 offsetof(struct htab_elem, lru_node),
-				 htab->elem_size, htab->map.max_entries);
+				 htab->elem_size, num_entries);
 	else
 		pcpu_freelist_populate(&htab->freelist,
 				       htab->elems + offsetof(struct htab_elem, fnode),
-				       htab->elem_size, htab->map.max_entries);
+				       htab->elem_size, num_entries);
 
 	return 0;
 
@@ -191,16 +192,22 @@ static void prealloc_destroy(struct bpf_htab *htab)
 
 static int alloc_extra_elems(struct bpf_htab *htab)
 {
-	void __percpu *pptr;
+	struct htab_elem *__percpu *pptr, *l_new;
+	struct pcpu_freelist_node *l;
 	int cpu;
 
-	pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
+	pptr = __alloc_percpu_gfp(sizeof(struct htab_elem *), 8,
+				  GFP_USER | __GFP_NOWARN);
 	if (!pptr)
 		return -ENOMEM;
 
 	for_each_possible_cpu(cpu) {
-		((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
-			HTAB_EXTRA_ELEM_FREE;
+		l = pcpu_freelist_pop(&htab->freelist);
+		/* pop will succeed, since prealloc_init()
+		 * preallocated extra num_possible_cpus elements
+		 */
+		l_new = container_of(l, struct htab_elem, fnode);
+		*per_cpu_ptr(pptr, cpu) = l_new;
 	}
 	htab->extra_elems = pptr;
 	return 0;
@@ -342,25 +349,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
 		raw_spin_lock_init(&htab->buckets[i].lock);
 	}
 
-	if (!percpu && !lru) {
-		/* lru itself can remove the least used element, so
-		 * there is no need for an extra elem during map_update.
-		 */
-		err = alloc_extra_elems(htab);
-		if (err)
-			goto free_buckets;
-	}
-
 	if (prealloc) {
 		err = prealloc_init(htab);
 		if (err)
-			goto free_extra_elems;
+			goto free_buckets;
+
+		if (!percpu && !lru) {
+			/* lru itself can remove the least used element, so
+			 * there is no need for an extra elem during map_update.
+			 */
+			err = alloc_extra_elems(htab);
+			if (err)
+				goto free_prealloc;
+		}
 	}
 
 	return &htab->map;
 
-free_extra_elems:
-	free_percpu(htab->extra_elems);
+free_prealloc:
+	prealloc_destroy(htab);
 free_buckets:
 	bpf_map_area_free(htab->buckets);
 free_htab:
@@ -575,12 +582,7 @@ static void htab_elem_free_rcu(struct rcu_head *head)
 
 static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 {
-	if (l->state == HTAB_EXTRA_ELEM_USED) {
-		l->state = HTAB_EXTRA_ELEM_FREE;
-		return;
-	}
-
-	if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
+	if (htab_is_prealloc(htab)) {
 		pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
 		atomic_dec(&htab->count);
@@ -610,47 +612,43 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
 					 bool percpu, bool onallcpus,
-					 bool old_elem_exists)
+					 struct htab_elem *old_elem)
 {
 	u32 size = htab->map.value_size;
-	bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
-	struct htab_elem *l_new;
+	bool prealloc = htab_is_prealloc(htab);
+	struct htab_elem *l_new, **pl_new;
 	void __percpu *pptr;
-	int err = 0;
 
 	if (prealloc) {
-		struct pcpu_freelist_node *l;
+		if (old_elem) {
+			/* if we're updating the existing element,
+			 * use per-cpu extra elems to avoid freelist_pop/push
+			 */
+			pl_new = this_cpu_ptr(htab->extra_elems);
+			l_new = *pl_new;
+			*pl_new = old_elem;
+		} else {
+			struct pcpu_freelist_node *l;
 
-		l = pcpu_freelist_pop(&htab->freelist);
-		if (!l)
-			err = -E2BIG;
-		else
+			l = pcpu_freelist_pop(&htab->freelist);
+			if (!l)
+				return ERR_PTR(-E2BIG);
 			l_new = container_of(l, struct htab_elem, fnode);
-	} else {
-		if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-			atomic_dec(&htab->count);
-			err = -E2BIG;
-		} else {
-			l_new = kmalloc(htab->elem_size,
-					GFP_ATOMIC | __GFP_NOWARN);
-			if (!l_new)
-				return ERR_PTR(-ENOMEM);
 		}
-	}
-
-	if (err) {
-		if (!old_elem_exists)
-			return ERR_PTR(err);
-
-		/* if we're updating the existing element and the hash table
-		 * is full, use per-cpu extra elems
-		 */
-		l_new = this_cpu_ptr(htab->extra_elems);
-		if (l_new->state != HTAB_EXTRA_ELEM_FREE)
-			return ERR_PTR(-E2BIG);
-		l_new->state = HTAB_EXTRA_ELEM_USED;
 	} else {
-		l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
+		if (atomic_inc_return(&htab->count) > htab->map.max_entries)
+			if (!old_elem) {
+				/* when map is full and update() is replacing
+				 * old element, it's ok to allocate, since
+				 * old element will be freed immediately.
+				 * Otherwise return an error
+				 */
+				atomic_dec(&htab->count);
+				return ERR_PTR(-E2BIG);
+			}
+		l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
+		if (!l_new)
+			return ERR_PTR(-ENOMEM);
 	}
 
 	memcpy(l_new->key, key, key_size);
@@ -731,7 +729,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		goto err;
 
 	l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
-				!!l_old);
+				l_old);
 	if (IS_ERR(l_new)) {
 		/* all pre-allocated elements are in use or memory exhausted */
 		ret = PTR_ERR(l_new);
@@ -744,7 +742,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 	if (l_old) {
 		hlist_nulls_del_rcu(&l_old->hash_node);
-		free_htab_elem(htab, l_old);
+		if (!htab_is_prealloc(htab))
+			free_htab_elem(htab, l_old);
 	}
 	ret = 0;
 err:
@@ -856,7 +855,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 				value, onallcpus);
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true, onallcpus, false);
+					hash, true, onallcpus, NULL);
 		if (IS_ERR(l_new)) {
 			ret = PTR_ERR(l_new);
 			goto err;
@@ -1024,8 +1023,7 @@ static void delete_all_elements(struct bpf_htab *htab)
 
 		hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
 			hlist_nulls_del_rcu(&l->hash_node);
-			if (l->state != HTAB_EXTRA_ELEM_USED)
-				htab_elem_free(htab, l);
+			htab_elem_free(htab, l);
 		}
 	}
 }
@@ -1045,7 +1043,7 @@ static void htab_map_free(struct bpf_map *map)
 	 * not have executed. Wait for them.
 	 */
 	rcu_barrier();
-	if (htab->map.map_flags & BPF_F_NO_PREALLOC)
+	if (!htab_is_prealloc(htab))
 		delete_all_elements(htab);
 	else
 		prealloc_destroy(htab);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index cada17ac00b8..a0aa2009b0e0 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -80,8 +80,9 @@ static void test_hashmap(int task, void *data)
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
 	key = 2;
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
-	key = 1;
-	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+	key = 3;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
 
 	/* Check that key = 0 doesn't exist. */
 	key = 0;
@@ -110,6 +111,24 @@ static void test_hashmap(int task, void *data)
 	close(fd);
 }
 
+static void test_hashmap_sizes(int task, void *data)
+{
+	int fd, i, j;
+
+	for (i = 1; i <= 512; i <<= 1)
+		for (j = 1; j <= 1 << 18; j <<= 1) {
+			fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+					    2, map_flags);
+			if (fd < 0) {
+				printf("Failed to create hashmap key=%d value=%d '%s'\n",
+				       i, j, strerror(errno));
+				exit(1);
+			}
+			close(fd);
+			usleep(10); /* give kernel time to destroy */
+		}
+}
+
 static void test_hashmap_percpu(int task, void *data)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
@@ -317,7 +336,10 @@ static void test_arraymap_percpu(int task, void *data)
 static void test_arraymap_percpu_many_keys(void)
 {
 	unsigned int nr_cpus = bpf_num_possible_cpus();
-	unsigned int nr_keys = 20000;
+	/* nr_keys is not too large otherwise the test stresses percpu
+	 * allocator more than anything else
+	 */
+	unsigned int nr_keys = 2000;
 	long values[nr_cpus];
 	int key, fd, i;
 
@@ -419,6 +441,7 @@ static void test_map_stress(void)
 {
 	run_parallel(100, test_hashmap, NULL);
 	run_parallel(100, test_hashmap_percpu, NULL);
+	run_parallel(100, test_hashmap_sizes, NULL);
 
 	run_parallel(100, test_arraymap, NULL);
 	run_parallel(100, test_arraymap_percpu, NULL);
-- 
cgit v1.2.3