summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-11 03:53:04 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-11 03:53:04 +0300
commit27bc50fc90647bbf7b734c3fc306a5e61350da53 (patch)
tree75fc525fbfec8c07a97a7875a89592317bcad4ca /tools/testing
parent70442fc54e6889a2a77f0e9554e8188a1557f00e (diff)
parentbbff39cc6cbcb86ccfacb2dcafc79912a9f9df69 (diff)
downloadlinux-27bc50fc90647bbf7b734c3fc306a5e61350da53.tar.xz
Merge tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - Yu Zhao's Multi-Gen LRU patches are here. They've been under test in linux-next for a couple of months without, to my knowledge, any negative reports (or any positive ones, come to that). - Also the Maple Tree from Liam Howlett. An overlapping range-based tree for vmas. It it apparently slightly more efficient in its own right, but is mainly targeted at enabling work to reduce mmap_lock contention. Liam has identified a number of other tree users in the kernel which could be beneficially onverted to mapletrees. Yu Zhao has identified a hard-to-hit but "easy to fix" lockdep splat at [1]. This has yet to be addressed due to Liam's unfortunately timed vacation. He is now back and we'll get this fixed up. - Dmitry Vyukov introduces KMSAN: the Kernel Memory Sanitizer. It uses clang-generated instrumentation to detect used-unintialized bugs down to the single bit level. KMSAN keeps finding bugs. New ones, as well as the legacy ones. - Yang Shi adds a userspace mechanism (madvise) to induce a collapse of memory into THPs. - Zach O'Keefe has expanded Yang Shi's madvise(MADV_COLLAPSE) to support file/shmem-backed pages. - userfaultfd updates from Axel Rasmussen - zsmalloc cleanups from Alexey Romanov - cleanups from Miaohe Lin: vmscan, hugetlb_cgroup, hugetlb and memory-failure - Huang Ying adds enhancements to NUMA balancing memory tiering mode's page promotion, with a new way of detecting hot pages. - memcg updates from Shakeel Butt: charging optimizations and reduced memory consumption. - memcg cleanups from Kairui Song. - memcg fixes and cleanups from Johannes Weiner. - Vishal Moola provides more folio conversions - Zhang Yi removed ll_rw_block() :( - migration enhancements from Peter Xu - migration error-path bugfixes from Huang Ying - Aneesh Kumar added ability for a device driver to alter the memory tiering promotion paths. For optimizations by PMEM drivers, DRM drivers, etc. - vma merging improvements from Jakub Matěn. - NUMA hinting cleanups from David Hildenbrand. - xu xin added aditional userspace visibility into KSM merging activity. - THP & KSM code consolidation from Qi Zheng. - more folio work from Matthew Wilcox. - KASAN updates from Andrey Konovalov. - DAMON cleanups from Kaixu Xia. - DAMON work from SeongJae Park: fixes, cleanups. - hugetlb sysfs cleanups from Muchun Song. - Mike Kravetz fixes locking issues in hugetlbfs and in hugetlb core. Link: https://lkml.kernel.org/r/CAOUHufZabH85CeUN-MEMgL8gJGzJEWUrkiM58JkTbBhh-jew0Q@mail.gmail.com [1] * tag 'mm-stable-2022-10-08' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (555 commits) hugetlb: allocate vma lock for all sharable vmas hugetlb: take hugetlb vma_lock when clearing vma_lock->vma pointer hugetlb: fix vma lock handling during split vma and range unmapping mglru: mm/vmscan.c: fix imprecise comments mm/mglru: don't sync disk for each aging cycle mm: memcontrol: drop dead CONFIG_MEMCG_SWAP config symbol mm: memcontrol: use do_memsw_account() in a few more places mm: memcontrol: deprecate swapaccounting=0 mode mm: memcontrol: don't allocate cgroup swap arrays when memcg is disabled mm/secretmem: remove reduntant return value mm/hugetlb: add available_huge_pages() func mm: remove unused inline functions from include/linux/mm_inline.h selftests/vm: add selftest for MADV_COLLAPSE of uffd-minor memory selftests/vm: add file/shmem MADV_COLLAPSE selftest for cleared pmd selftests/vm: add thp collapse shmem testing selftests/vm: add thp collapse file and tmpfs testing selftests/vm: modularize thp collapse memory operations selftests/vm: dedup THP helpers mm/khugepaged: add tracepoint to hpage_collapse_scan_file() mm/madvise: add file and shmem support to MADV_COLLAPSE ...
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/memblock/linux/mmzone.h2
-rw-r--r--tools/testing/radix-tree/.gitignore2
-rw-r--r--tools/testing/radix-tree/Makefile9
-rw-r--r--tools/testing/radix-tree/generated/autoconf.h1
-rw-r--r--tools/testing/radix-tree/linux.c160
-rw-r--r--tools/testing/radix-tree/linux/kernel.h1
-rw-r--r--tools/testing/radix-tree/linux/lockdep.h2
-rw-r--r--tools/testing/radix-tree/linux/maple_tree.h7
-rw-r--r--tools/testing/radix-tree/maple.c59
-rw-r--r--tools/testing/radix-tree/trace/events/maple_tree.h5
-rw-r--r--tools/testing/selftests/cgroup/config1
-rw-r--r--tools/testing/selftests/damon/Makefile1
-rw-r--r--tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh27
-rw-r--r--tools/testing/selftests/vm/.gitignore1
-rw-r--r--tools/testing/selftests/vm/Makefile23
-rw-r--r--tools/testing/selftests/vm/check_config.sh31
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c108
-rw-r--r--tools/testing/selftests/vm/khugepaged.c1195
-rw-r--r--tools/testing/selftests/vm/mremap_test.c49
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests.sh15
-rw-r--r--tools/testing/selftests/vm/soft-dirty.c2
-rw-r--r--tools/testing/selftests/vm/split_huge_page_test.c12
-rwxr-xr-xtools/testing/selftests/vm/test_hmm.sh10
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c247
-rw-r--r--tools/testing/selftests/vm/vm_util.c36
-rw-r--r--tools/testing/selftests/vm/vm_util.h5
26 files changed, 1500 insertions, 511 deletions
diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h
index 7c2eb5c9bb54..e65f89b12f1c 100644
--- a/tools/testing/memblock/linux/mmzone.h
+++ b/tools/testing/memblock/linux/mmzone.h
@@ -22,6 +22,8 @@ enum zone_type {
#define pageblock_order (MAX_ORDER - 1)
#define pageblock_nr_pages BIT(pageblock_order)
+#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages)
+#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages)
struct zone {
atomic_long_t managed_pages;
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d971516401e6..c901d96dd013 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -6,3 +6,5 @@ main
multiorder
radix-tree.c
xarray
+maple
+ma_xa_benchmark
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index c4ea4fbb0bfc..89d613e0505b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -4,9 +4,9 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
-fsanitize=undefined
LDFLAGS += -fsanitize=address -fsanitize=undefined
LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder xarray
+TARGETS = main idr-test multiorder xarray maple
CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \
- slab.o
+ slab.o maple.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
iteration_check_2.o benchmark.o
@@ -29,6 +29,8 @@ idr-test: idr-test.o $(CORE_OFILES)
xarray: $(CORE_OFILES)
+maple: $(CORE_OFILES)
+
multiorder: multiorder.o $(CORE_OFILES)
clean:
@@ -40,6 +42,7 @@ $(OFILES): Makefile *.h */*.h generated/map-shift.h \
../../include/linux/*.h \
../../include/asm/*.h \
../../../include/linux/xarray.h \
+ ../../../include/linux/maple_tree.h \
../../../include/linux/radix-tree.h \
../../../include/linux/idr.h
@@ -51,6 +54,8 @@ idr.c: ../../../lib/idr.c
xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
+maple.o: ../../../lib/maple_tree.c ../../../lib/test_maple_tree.c
+
generated/map-shift.h:
@if ! grep -qws $(SHIFT) generated/map-shift.h; then \
echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index 2218b3cc184e..e7da80350236 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1 +1,2 @@
#define CONFIG_XARRAY_MULTI 1
+#define CONFIG_64BIT 1
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index d5c1bcba86fe..2048d12c31df 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -23,15 +23,47 @@ struct kmem_cache {
int nr_objs;
void *objs;
void (*ctor)(void *);
+ unsigned int non_kernel;
+ unsigned long nr_allocated;
+ unsigned long nr_tallocated;
};
+void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val)
+{
+ cachep->non_kernel = val;
+}
+
+unsigned long kmem_cache_get_alloc(struct kmem_cache *cachep)
+{
+ return cachep->size * cachep->nr_allocated;
+}
+
+unsigned long kmem_cache_nr_allocated(struct kmem_cache *cachep)
+{
+ return cachep->nr_allocated;
+}
+
+unsigned long kmem_cache_nr_tallocated(struct kmem_cache *cachep)
+{
+ return cachep->nr_tallocated;
+}
+
+void kmem_cache_zero_nr_tallocated(struct kmem_cache *cachep)
+{
+ cachep->nr_tallocated = 0;
+}
+
void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
int gfp)
{
void *p;
- if (!(gfp & __GFP_DIRECT_RECLAIM))
- return NULL;
+ if (!(gfp & __GFP_DIRECT_RECLAIM)) {
+ if (!cachep->non_kernel)
+ return NULL;
+
+ cachep->non_kernel--;
+ }
pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs) {
@@ -53,19 +85,21 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
memset(p, 0, cachep->size);
}
+ uatomic_inc(&cachep->nr_allocated);
uatomic_inc(&nr_allocated);
+ uatomic_inc(&cachep->nr_tallocated);
if (kmalloc_verbose)
printf("Allocating %p from slab\n", p);
return p;
}
-void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+void kmem_cache_free_locked(struct kmem_cache *cachep, void *objp)
{
assert(objp);
uatomic_dec(&nr_allocated);
+ uatomic_dec(&cachep->nr_allocated);
if (kmalloc_verbose)
printf("Freeing %p to slab\n", objp);
- pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs > 10 || cachep->align) {
memset(objp, POISON_FREE, cachep->size);
free(objp);
@@ -75,9 +109,80 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
node->parent = cachep->objs;
cachep->objs = node;
}
+}
+
+void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+{
+ pthread_mutex_lock(&cachep->lock);
+ kmem_cache_free_locked(cachep, objp);
pthread_mutex_unlock(&cachep->lock);
}
+void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list)
+{
+ if (kmalloc_verbose)
+ pr_debug("Bulk free %p[0-%lu]\n", list, size - 1);
+
+ pthread_mutex_lock(&cachep->lock);
+ for (int i = 0; i < size; i++)
+ kmem_cache_free_locked(cachep, list[i]);
+ pthread_mutex_unlock(&cachep->lock);
+}
+
+int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
+ void **p)
+{
+ size_t i;
+
+ if (kmalloc_verbose)
+ pr_debug("Bulk alloc %lu\n", size);
+
+ if (!(gfp & __GFP_DIRECT_RECLAIM)) {
+ if (cachep->non_kernel < size)
+ return 0;
+
+ cachep->non_kernel -= size;
+ }
+
+ pthread_mutex_lock(&cachep->lock);
+ if (cachep->nr_objs >= size) {
+ struct radix_tree_node *node;
+
+ for (i = 0; i < size; i++) {
+ node = cachep->objs;
+ cachep->nr_objs--;
+ cachep->objs = node->parent;
+ p[i] = node;
+ node->parent = NULL;
+ }
+ pthread_mutex_unlock(&cachep->lock);
+ } else {
+ pthread_mutex_unlock(&cachep->lock);
+ for (i = 0; i < size; i++) {
+ if (cachep->align) {
+ posix_memalign(&p[i], cachep->align,
+ cachep->size * size);
+ } else {
+ p[i] = malloc(cachep->size * size);
+ }
+ if (cachep->ctor)
+ cachep->ctor(p[i]);
+ else if (gfp & __GFP_ZERO)
+ memset(p[i], 0, cachep->size);
+ }
+ }
+
+ for (i = 0; i < size; i++) {
+ uatomic_inc(&nr_allocated);
+ uatomic_inc(&cachep->nr_allocated);
+ uatomic_inc(&cachep->nr_tallocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from slab\n", p[i]);
+ }
+
+ return size;
+}
+
struct kmem_cache *
kmem_cache_create(const char *name, unsigned int size, unsigned int align,
unsigned int flags, void (*ctor)(void *))
@@ -88,7 +193,54 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align,
ret->size = size;
ret->align = align;
ret->nr_objs = 0;
+ ret->nr_allocated = 0;
+ ret->nr_tallocated = 0;
ret->objs = NULL;
ret->ctor = ctor;
+ ret->non_kernel = 0;
return ret;
}
+
+/*
+ * Test the test infrastructure for kem_cache_alloc/free and bulk counterparts.
+ */
+void test_kmem_cache_bulk(void)
+{
+ int i;
+ void *list[12];
+ static struct kmem_cache *test_cache, *test_cache2;
+
+ /*
+ * Testing the bulk allocators without aligned kmem_cache to force the
+ * bulk alloc/free to reuse
+ */
+ test_cache = kmem_cache_create("test_cache", 256, 0, SLAB_PANIC, NULL);
+
+ for (i = 0; i < 5; i++)
+ list[i] = kmem_cache_alloc(test_cache, __GFP_DIRECT_RECLAIM);
+
+ for (i = 0; i < 5; i++)
+ kmem_cache_free(test_cache, list[i]);
+ assert(test_cache->nr_objs == 5);
+
+ kmem_cache_alloc_bulk(test_cache, __GFP_DIRECT_RECLAIM, 5, list);
+ kmem_cache_free_bulk(test_cache, 5, list);
+
+ for (i = 0; i < 12 ; i++)
+ list[i] = kmem_cache_alloc(test_cache, __GFP_DIRECT_RECLAIM);
+
+ for (i = 0; i < 12; i++)
+ kmem_cache_free(test_cache, list[i]);
+
+ /* The last free will not be kept around */
+ assert(test_cache->nr_objs == 11);
+
+ /* Aligned caches will immediately free */
+ test_cache2 = kmem_cache_create("test_cache2", 128, 128, SLAB_PANIC, NULL);
+
+ kmem_cache_alloc_bulk(test_cache2, __GFP_DIRECT_RECLAIM, 10, list);
+ kmem_cache_free_bulk(test_cache2, 10, list);
+ assert(!test_cache2->nr_objs);
+
+
+}
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 39867fd80c8f..c5c9d05f29da 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -14,6 +14,7 @@
#include "../../../include/linux/kconfig.h"
#define printk printf
+#define pr_err printk
#define pr_info printk
#define pr_debug printk
#define pr_cont printk
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
index 016cff473cfc..62473ab57f99 100644
--- a/tools/testing/radix-tree/linux/lockdep.h
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -11,4 +11,6 @@ static inline void lockdep_set_class(spinlock_t *lock,
struct lock_class_key *key)
{
}
+
+extern int lockdep_is_held(const void *);
#endif /* _LINUX_LOCKDEP_H */
diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/radix-tree/linux/maple_tree.h
new file mode 100644
index 000000000000..7d8d1f445b89
--- /dev/null
+++ b/tools/testing/radix-tree/linux/maple_tree.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+#define atomic_t int32_t
+#include "../../../../include/linux/maple_tree.h"
+#define atomic_inc(x) uatomic_inc(x)
+#define atomic_read(x) uatomic_read(x)
+#define atomic_set(x, y) do {} while (0)
+#define U8_MAX UCHAR_MAX
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
new file mode 100644
index 000000000000..35082671928a
--- /dev/null
+++ b/tools/testing/radix-tree/maple.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * maple_tree.c: Userspace shim for maple tree test-suite
+ * Copyright (c) 2018 Liam R. Howlett <Liam.Howlett@Oracle.com>
+ */
+
+#define CONFIG_DEBUG_MAPLE_TREE
+#define CONFIG_MAPLE_SEARCH
+#include "test.h"
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack() assert(0)
+
+#include "../../../lib/maple_tree.c"
+#undef CONFIG_DEBUG_MAPLE_TREE
+#include "../../../lib/test_maple_tree.c"
+
+void farmer_tests(void)
+{
+ struct maple_node *node;
+ DEFINE_MTREE(tree);
+
+ mt_dump(&tree);
+
+ tree.ma_root = xa_mk_value(0);
+ mt_dump(&tree);
+
+ node = mt_alloc_one(GFP_KERNEL);
+ node->parent = (void *)((unsigned long)(&tree) | 1);
+ node->slot[0] = xa_mk_value(0);
+ node->slot[1] = xa_mk_value(1);
+ node->mr64.pivot[0] = 0;
+ node->mr64.pivot[1] = 1;
+ node->mr64.pivot[2] = 0;
+ tree.ma_root = mt_mk_node(node, maple_leaf_64);
+ mt_dump(&tree);
+
+ ma_free_rcu(node);
+}
+
+void maple_tree_tests(void)
+{
+ farmer_tests();
+ maple_tree_seed();
+ maple_tree_harvest();
+}
+
+int __weak main(void)
+{
+ maple_tree_init();
+ maple_tree_tests();
+ rcu_barrier();
+ if (nr_allocated)
+ printf("nr_allocated = %d\n", nr_allocated);
+ return 0;
+}
diff --git a/tools/testing/radix-tree/trace/events/maple_tree.h b/tools/testing/radix-tree/trace/events/maple_tree.h
new file mode 100644
index 000000000000..97d0e1ddcf08
--- /dev/null
+++ b/tools/testing/radix-tree/trace/events/maple_tree.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#define trace_ma_op(a, b) do {} while (0)
+#define trace_ma_read(a, b) do {} while (0)
+#define trace_ma_write(a, b, c, d) do {} while (0)
diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config
index 84fe884fad86..97d549ee894f 100644
--- a/tools/testing/selftests/cgroup/config
+++ b/tools/testing/selftests/cgroup/config
@@ -4,5 +4,4 @@ CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_SCHED=y
CONFIG_MEMCG=y
CONFIG_MEMCG_KMEM=y
-CONFIG_MEMCG_SWAP=y
CONFIG_PAGE_COUNTER=y
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 0470c5f3e690..a1fa2eff8192 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_FILES += huge_count_read_write
TEST_FILES = _chk_dependency.sh _debugfs_common.sh
TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
+TEST_PROGS += debugfs_duplicate_context_creation.sh
TEST_PROGS += sysfs.sh
include ../lib.mk
diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
new file mode 100644
index 000000000000..4a76e37ef16b
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test duplicated context creation
+# ================================
+
+if ! echo foo > "$DBGFS/mk_contexts"
+then
+ echo "context creation failed"
+ exit 1
+fi
+
+if echo foo > "$DBGFS/mk_contexts"
+then
+ echo "duplicate context creation success"
+ exit 1
+fi
+
+if ! echo foo > "$DBGFS/rm_contexts"
+then
+ echo "context deletion failed"
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 31e5eea2a9b9..7b9dc2426f18 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -30,7 +30,6 @@ map_fixed_noreplace
write_to_hugetlbfs
hmm-tests
memfd_secret
-local_config.*
soft-dirty
split_huge_page_test
ksm_tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index d516b8c38eed..163c2fde3cb3 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
-LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
-
-include local_config.mk
+LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h
uname_M := $(shell uname -m 2>/dev/null || echo not)
MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
@@ -97,9 +95,11 @@ TEST_FILES += va_128TBswitch.sh
include ../lib.mk
+$(OUTPUT)/khugepaged: vm_util.c
$(OUTPUT)/madv_populate: vm_util.c
$(OUTPUT)/soft-dirty: vm_util.c
$(OUTPUT)/split_huge_page_test: vm_util.c
+$(OUTPUT)/userfaultfd: vm_util.c
ifeq ($(MACHINE),x86_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
@@ -152,23 +152,6 @@ endif
$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
-# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
-$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
-
$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
$(OUTPUT)/migration: LDLIBS += -lnuma
-
-local_config.mk local_config.h: check_config.sh
- /bin/sh ./check_config.sh $(CC)
-
-EXTRA_CLEAN += local_config.mk local_config.h
-
-ifeq ($(HMM_EXTRA_LIBS),)
-all: warn_missing_hugelibs
-
-warn_missing_hugelibs:
- @echo ; \
- echo "Warning: missing libhugetlbfs support. Some HMM tests will be skipped." ; \
- echo
-endif
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh
deleted file mode 100644
index 079c8a40b85d..000000000000
--- a/tools/testing/selftests/vm/check_config.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Probe for libraries and create header files to record the results. Both C
-# header files and Makefile include fragments are created.
-
-OUTPUT_H_FILE=local_config.h
-OUTPUT_MKFILE=local_config.mk
-
-# libhugetlbfs
-tmpname=$(mktemp)
-tmpfile_c=${tmpname}.c
-tmpfile_o=${tmpname}.o
-
-echo "#include <sys/types.h>" > $tmpfile_c
-echo "#include <hugetlbfs.h>" >> $tmpfile_c
-echo "int func(void) { return 0; }" >> $tmpfile_c
-
-CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
-$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
-
-if [ -f $tmpfile_o ]; then
- echo "#define LOCAL_CONFIG_HAVE_LIBHUGETLBFS 1" > $OUTPUT_H_FILE
- echo "HMM_EXTRA_LIBS = -lhugetlbfs" > $OUTPUT_MKFILE
-else
- echo "// No libhugetlbfs support found" > $OUTPUT_H_FILE
- echo "# No libhugetlbfs support found, so:" > $OUTPUT_MKFILE
- echo "HMM_EXTRA_LIBS = " >> $OUTPUT_MKFILE
-fi
-
-rm ${tmpname}.*
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 98b949c279be..7d722265dcd7 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -26,10 +26,6 @@
#include <sys/mman.h>
#include <sys/ioctl.h>
-#include "./local_config.h"
-#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
-#include <hugetlbfs.h>
-#endif
/*
* This is a private UAPI to the kernel test module so it isn't exported
@@ -733,7 +729,54 @@ TEST_F(hmm, anon_write_huge)
hmm_buffer_free(buffer);
}
-#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
+/*
+ * Read numeric data from raw and tagged kernel status files. Used to read
+ * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag).
+ */
+static long file_read_ulong(char *file, const char *tag)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q;
+ long val;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
/*
* Write huge TLBFS page.
*/
@@ -742,29 +785,27 @@ TEST_F(hmm, anon_write_hugetlbfs)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size;
+ unsigned long default_hsize;
unsigned long i;
int *ptr;
int ret;
- long pagesizes[4];
- int n, idx;
-
- /* Skip test if we can't allocate a hugetlbfs page. */
- n = gethugepagesizes(pagesizes, 4);
- if (n <= 0)
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
SKIP(return, "Huge page size could not be determined");
- for (idx = 0; --n > 0; ) {
- if (pagesizes[n] < pagesizes[idx])
- idx = n;
- }
- size = ALIGN(TWOMEG, pagesizes[idx]);
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
buffer = malloc(sizeof(*buffer));
ASSERT_NE(buffer, NULL);
- buffer->ptr = get_hugepage_region(size, GHR_STRICT);
- if (buffer->ptr == NULL) {
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
free(buffer);
SKIP(return, "Huge page could not be allocated");
}
@@ -788,11 +829,10 @@ TEST_F(hmm, anon_write_hugetlbfs)
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- free_hugepage_region(buffer->ptr);
+ munmap(buffer->ptr, buffer->size);
buffer->ptr = NULL;
hmm_buffer_free(buffer);
}
-#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
/*
* Read mmap'ed file memory.
@@ -1467,7 +1507,6 @@ TEST_F(hmm2, snapshot)
hmm_buffer_free(buffer);
}
-#ifdef LOCAL_CONFIG_HAVE_LIBHUGETLBFS
/*
* Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
* should be mapped by a large page table entry.
@@ -1477,30 +1516,30 @@ TEST_F(hmm, compound)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size;
+ unsigned long default_hsize;
int *ptr;
unsigned char *m;
int ret;
- long pagesizes[4];
- int n, idx;
unsigned long i;
/* Skip test if we can't allocate a hugetlbfs page. */
- n = gethugepagesizes(pagesizes, 4);
- if (n <= 0)
- return;
- for (idx = 0; --n > 0; ) {
- if (pagesizes[n] < pagesizes[idx])
- idx = n;
- }
- size = ALIGN(TWOMEG, pagesizes[idx]);
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
+ SKIP(return, "Huge page size could not be determined");
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
buffer = malloc(sizeof(*buffer));
ASSERT_NE(buffer, NULL);
- buffer->ptr = get_hugepage_region(size, GHR_STRICT);
- if (buffer->ptr == NULL) {
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
free(buffer);
return;
}
@@ -1539,11 +1578,10 @@ TEST_F(hmm, compound)
ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
HMM_DMIRROR_PROT_PMD);
- free_hugepage_region(buffer->ptr);
+ munmap(buffer->ptr, buffer->size);
buffer->ptr = NULL;
hmm_buffer_free(buffer);
}
-#endif /* LOCAL_CONFIG_HAVE_LIBHUGETLBFS */
/*
* Test two devices reading the same memory (double mapped).
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
index 155120b67a16..64126c8cd561 100644
--- a/tools/testing/selftests/vm/khugepaged.c
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -1,6 +1,9 @@
#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
#include <fcntl.h>
#include <limits.h>
+#include <dirent.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -10,10 +13,24 @@
#include <sys/mman.h>
#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+
+#include "linux/magic.h"
+
+#include "vm_util.h"
#ifndef MADV_PAGEOUT
#define MADV_PAGEOUT 21
#endif
+#ifndef MADV_POPULATE_READ
+#define MADV_POPULATE_READ 22
+#endif
+#ifndef MADV_COLLAPSE
+#define MADV_COLLAPSE 25
+#endif
#define BASE_ADDR ((void *)(1UL << 30))
static unsigned long hpage_pmd_size;
@@ -22,6 +39,47 @@ static int hpage_pmd_nr;
#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
#define PID_SMAPS "/proc/self/smaps"
+#define TEST_FILE "collapse_test_file"
+
+#define MAX_LINE_LENGTH 500
+
+enum vma_type {
+ VMA_ANON,
+ VMA_FILE,
+ VMA_SHMEM,
+};
+
+struct mem_ops {
+ void *(*setup_area)(int nr_hpages);
+ void (*cleanup_area)(void *p, unsigned long size);
+ void (*fault)(void *p, unsigned long start, unsigned long end);
+ bool (*check_huge)(void *addr, int nr_hpages);
+ const char *name;
+};
+
+static struct mem_ops *file_ops;
+static struct mem_ops *anon_ops;
+static struct mem_ops *shmem_ops;
+
+struct collapse_context {
+ void (*collapse)(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect);
+ bool enforce_pte_scan_limits;
+ const char *name;
+};
+
+static struct collapse_context *khugepaged_context;
+static struct collapse_context *madvise_context;
+
+struct file_info {
+ const char *dir;
+ char path[PATH_MAX];
+ enum vma_type type;
+ int fd;
+ char dev_queue_read_ahead_path[PATH_MAX];
+};
+
+static struct file_info finfo;
enum thp_enabled {
THP_ALWAYS,
@@ -88,18 +146,7 @@ struct settings {
enum shmem_enabled shmem_enabled;
bool use_zero_page;
struct khugepaged_settings khugepaged;
-};
-
-static struct settings default_settings = {
- .thp_enabled = THP_MADVISE,
- .thp_defrag = THP_DEFRAG_ALWAYS,
- .shmem_enabled = SHMEM_NEVER,
- .use_zero_page = 0,
- .khugepaged = {
- .defrag = 1,
- .alloc_sleep_millisecs = 10,
- .scan_sleep_millisecs = 10,
- },
+ unsigned long read_ahead_kb;
};
static struct settings saved_settings;
@@ -118,6 +165,11 @@ static void fail(const char *msg)
exit_status++;
}
+static void skip(const char *msg)
+{
+ printf(" \e[33m%s\e[0m\n", msg);
+}
+
static int read_file(const char *path, char *buf, size_t buflen)
{
int fd;
@@ -145,13 +197,19 @@ static int write_file(const char *path, const char *buf, size_t buflen)
ssize_t numwritten;
fd = open(path, O_WRONLY);
- if (fd == -1)
+ if (fd == -1) {
+ printf("open(%s)\n", path);
+ exit(EXIT_FAILURE);
return 0;
+ }
numwritten = write(fd, buf, buflen - 1);
close(fd);
- if (numwritten < 1)
+ if (numwritten < 1) {
+ printf("write(%s)\n", buf);
+ exit(EXIT_FAILURE);
return 0;
+ }
return (unsigned int) numwritten;
}
@@ -218,20 +276,11 @@ static void write_string(const char *name, const char *val)
}
}
-static const unsigned long read_num(const char *name)
+static const unsigned long _read_num(const char *path)
{
- char path[PATH_MAX];
char buf[21];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- ret = read_file(path, buf, sizeof(buf));
- if (ret < 0) {
+ if (read_file(path, buf, sizeof(buf)) < 0) {
perror("read_file(read_num)");
exit(EXIT_FAILURE);
}
@@ -239,10 +288,9 @@ static const unsigned long read_num(const char *name)
return strtoul(buf, NULL, 10);
}
-static void write_num(const char *name, unsigned long num)
+static const unsigned long read_num(const char *name)
{
char path[PATH_MAX];
- char buf[21];
int ret;
ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
@@ -250,6 +298,12 @@ static void write_num(const char *name, unsigned long num)
printf("%s: Pathname is too long\n", __func__);
exit(EXIT_FAILURE);
}
+ return _read_num(path);
+}
+
+static void _write_num(const char *path, unsigned long num)
+{
+ char buf[21];
sprintf(buf, "%ld", num);
if (!write_file(path, buf, strlen(buf) + 1)) {
@@ -258,6 +312,19 @@ static void write_num(const char *name, unsigned long num)
}
}
+static void write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ _write_num(path, num);
+}
+
static void write_settings(struct settings *settings)
{
struct khugepaged_settings *khugepaged = &settings->khugepaged;
@@ -277,6 +344,43 @@ static void write_settings(struct settings *settings)
write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+
+ if (file_ops && finfo.type == VMA_FILE)
+ _write_num(finfo.dev_queue_read_ahead_path,
+ settings->read_ahead_kb);
+}
+
+#define MAX_SETTINGS_DEPTH 4
+static struct settings settings_stack[MAX_SETTINGS_DEPTH];
+static int settings_index;
+
+static struct settings *current_settings(void)
+{
+ if (!settings_index) {
+ printf("Fail: No settings set");
+ exit(EXIT_FAILURE);
+ }
+ return settings_stack + settings_index - 1;
+}
+
+static void push_settings(struct settings *settings)
+{
+ if (settings_index >= MAX_SETTINGS_DEPTH) {
+ printf("Fail: Settings stack exceeded");
+ exit(EXIT_FAILURE);
+ }
+ settings_stack[settings_index++] = *settings;
+ write_settings(current_settings());
+}
+
+static void pop_settings(void)
+{
+ if (settings_index <= 0) {
+ printf("Fail: Settings stack empty");
+ exit(EXIT_FAILURE);
+ }
+ --settings_index;
+ write_settings(current_settings());
}
static void restore_settings(int sig)
@@ -314,6 +418,10 @@ static void save_settings(void)
.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
.pages_to_scan = read_num("khugepaged/pages_to_scan"),
};
+ if (file_ops && finfo.type == VMA_FILE)
+ saved_settings.read_ahead_kb =
+ _read_num(finfo.dev_queue_read_ahead_path);
+
success("OK");
signal(SIGTERM, restore_settings);
@@ -322,72 +430,90 @@ static void save_settings(void)
signal(SIGQUIT, restore_settings);
}
-static void adjust_settings(void)
+static void get_finfo(const char *dir)
{
+ struct stat path_stat;
+ struct statfs fs;
+ char buf[1 << 10];
+ char path[PATH_MAX];
+ char *str, *end;
- printf("Adjust settings...");
- write_settings(&default_settings);
- success("OK");
-}
-
-#define MAX_LINE_LENGTH 500
-
-static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
-{
- while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
- if (!strncmp(buf, pattern, strlen(pattern)))
- return true;
+ finfo.dir = dir;
+ stat(finfo.dir, &path_stat);
+ if (!S_ISDIR(path_stat.st_mode)) {
+ printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
+ exit(EXIT_FAILURE);
}
- return false;
-}
-
-static bool check_huge(void *addr)
-{
- bool thp = false;
- int ret;
- FILE *fp;
- char buffer[MAX_LINE_LENGTH];
- char addr_pattern[MAX_LINE_LENGTH];
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
- (unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
+ if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
+ finfo.dir) >= sizeof(finfo.path)) {
+ printf("%s: Pathname is too long\n", __func__);
exit(EXIT_FAILURE);
}
-
-
- fp = fopen(PID_SMAPS, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ if (statfs(finfo.dir, &fs)) {
+ perror("statfs()");
exit(EXIT_FAILURE);
}
- if (!check_for_pattern(fp, addr_pattern, buffer))
- goto err_out;
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
- hpage_pmd_size >> 10);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
+ finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
+ if (finfo.type == VMA_SHMEM)
+ return;
+
+ /* Find owning device's queue/read_ahead_kb control */
+ if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file(read_num)");
+ exit(EXIT_FAILURE);
+ }
+ if (strstr(buf, "DEVTYPE=disk")) {
+ /* Found it */
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/dev/block/%d:%d/queue/read_ahead_kb",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ if (!strstr(buf, "DEVTYPE=partition")) {
+ printf("%s: Unknown device type: %s\n", __func__, path);
exit(EXIT_FAILURE);
}
/*
- * Fetch the AnonHugePages: in the same block and check whether it got
- * the expected number of hugeepages next.
+ * Partition of block device - need to find actual device.
+ * Using naming convention that devnameN is partition of
+ * device devname.
*/
- if (!check_for_pattern(fp, "AnonHugePages:", buffer))
- goto err_out;
-
- if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
- goto err_out;
-
- thp = true;
-err_out:
- fclose(fp);
- return thp;
+ str = strstr(buf, "DEVNAME=");
+ if (!str) {
+ printf("%s: Could not read: %s", __func__, path);
+ exit(EXIT_FAILURE);
+ }
+ str += 8;
+ end = str;
+ while (*end) {
+ if (isdigit(*end)) {
+ *end = '\0';
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/block/%s/queue/read_ahead_kb",
+ str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ ++end;
+ }
+ printf("%s: Could not read: %s\n", __func__, path);
+ exit(EXIT_FAILURE);
}
-
static bool check_swap(void *addr, unsigned long size)
{
bool swap = false;
@@ -409,7 +535,7 @@ static bool check_swap(void *addr, unsigned long size)
printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
exit(EXIT_FAILURE);
}
- if (!check_for_pattern(fp, addr_pattern, buffer))
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
goto err_out;
ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
@@ -422,7 +548,7 @@ static bool check_swap(void *addr, unsigned long size)
* Fetch the Swap: in the same block and check whether it got
* the expected number of hugeepages next.
*/
- if (!check_for_pattern(fp, "Swap:", buffer))
+ if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
goto err_out;
if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
@@ -434,12 +560,12 @@ err_out:
return swap;
}
-static void *alloc_mapping(void)
+static void *alloc_mapping(int nr)
{
void *p;
- p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (p != BASE_ADDR) {
printf("Failed to allocate VMA at %p\n", BASE_ADDR);
exit(EXIT_FAILURE);
@@ -456,6 +582,60 @@ static void fill_memory(int *p, unsigned long start, unsigned long end)
p[i * page_size / sizeof(*p)] = i + 0xdead0000;
}
+/*
+ * MADV_COLLAPSE is a best-effort request and may fail if an internal
+ * resource is temporarily unavailable, in which case it will set errno to
+ * EAGAIN. In such a case, immediately reattempt the operation one more
+ * time.
+ */
+static int madvise_collapse_retry(void *p, unsigned long size)
+{
+ bool retry = true;
+ int ret;
+
+retry:
+ ret = madvise(p, size, MADV_COLLAPSE);
+ if (ret && errno == EAGAIN && retry) {
+ retry = false;
+ goto retry;
+ }
+ return ret;
+}
+
+/*
+ * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
+ * validate_memory()'able contents.
+ */
+static void *alloc_hpage(struct mem_ops *ops)
+{
+ void *p = ops->setup_area(1);
+
+ ops->fault(p, 0, hpage_pmd_size);
+
+ /*
+ * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
+ * The latter is ineligible for collapse by MADV_COLLAPSE
+ * while the former might cause MADV_COLLAPSE to race with
+ * khugepaged on low-load system (like a test machine), which
+ * would cause MADV_COLLAPSE to fail with EAGAIN.
+ */
+ printf("Allocate huge page...");
+ if (madvise_collapse_retry(p, hpage_pmd_size)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (!ops->check_huge(p, 1)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
+ perror("madvise(MADV_HUGEPAGE)");
+ exit(EXIT_FAILURE);
+ }
+ success("OK");
+ return p;
+}
+
static void validate_memory(int *p, unsigned long start, unsigned long end)
{
int i;
@@ -469,26 +649,216 @@ static void validate_memory(int *p, unsigned long start, unsigned long end)
}
}
+static void *anon_setup_area(int nr_hpages)
+{
+ return alloc_mapping(nr_hpages);
+}
+
+static void anon_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+}
+
+static void anon_fault(void *p, unsigned long start, unsigned long end)
+{
+ fill_memory(p, start, end);
+}
+
+static bool anon_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
+}
+
+static void *file_setup_area(int nr_hpages)
+{
+ int fd;
+ void *p;
+ unsigned long size;
+
+ unlink(finfo.path); /* Cleanup from previous failed tests */
+ printf("Creating %s for collapse%s...", finfo.path,
+ finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
+ fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
+ 777);
+ if (fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+
+ size = nr_hpages * hpage_pmd_size;
+ p = alloc_mapping(nr_hpages);
+ fill_memory(p, 0, size);
+ write(fd, p, size);
+ close(fd);
+ munmap(p, size);
+ success("OK");
+
+ printf("Opening %s read only for collapse...", finfo.path);
+ finfo.fd = open(finfo.path, O_RDONLY, 777);
+ if (finfo.fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, finfo.fd, 0);
+ if (p == MAP_FAILED || p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Drop page cache */
+ write_file("/proc/sys/vm/drop_caches", "3", 2);
+ success("OK");
+ return p;
+}
+
+static void file_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+ unlink(finfo.path);
+}
+
+static void file_fault(void *p, unsigned long start, unsigned long end)
+{
+ if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
+ perror("madvise(MADV_POPULATE_READ");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static bool file_check_huge(void *addr, int nr_hpages)
+{
+ switch (finfo.type) {
+ case VMA_FILE:
+ return check_huge_file(addr, nr_hpages, hpage_pmd_size);
+ case VMA_SHMEM:
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+ default:
+ exit(EXIT_FAILURE);
+ return false;
+ }
+}
+
+static void *shmem_setup_area(int nr_hpages)
+{
+ void *p;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
+ if (finfo.fd < 0) {
+ perror("memfd_create()");
+ exit(EXIT_FAILURE);
+ }
+ if (ftruncate(finfo.fd, size)) {
+ perror("ftruncate()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
+ 0);
+ if (p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+ return p;
+}
+
+static void shmem_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+}
+
+static bool shmem_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+}
+
+static struct mem_ops __anon_ops = {
+ .setup_area = &anon_setup_area,
+ .cleanup_area = &anon_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &anon_check_huge,
+ .name = "anon",
+};
+
+static struct mem_ops __file_ops = {
+ .setup_area = &file_setup_area,
+ .cleanup_area = &file_cleanup_area,
+ .fault = &file_fault,
+ .check_huge = &file_check_huge,
+ .name = "file",
+};
+
+static struct mem_ops __shmem_ops = {
+ .setup_area = &shmem_setup_area,
+ .cleanup_area = &shmem_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &shmem_check_huge,
+ .name = "shmem",
+};
+
+static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ int ret;
+ struct settings settings = *current_settings();
+
+ printf("%s...", msg);
+
+ /*
+ * Prevent khugepaged interference and tests that MADV_COLLAPSE
+ * ignores /sys/kernel/mm/transparent_hugepage/enabled
+ */
+ settings.thp_enabled = THP_NEVER;
+ settings.shmem_enabled = SHMEM_NEVER;
+ push_settings(&settings);
+
+ /* Clear VM_NOHUGEPAGE */
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
+ ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
+ if (((bool)ret) == expect)
+ fail("Fail: Bad return value");
+ else if (!ops->check_huge(p, expect ? nr_hpages : 0))
+ fail("Fail: check_huge()");
+ else
+ success("OK");
+
+ pop_settings();
+}
+
+static void madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ /* Sanity check */
+ if (!ops->check_huge(p, 0)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ __madvise_collapse(msg, p, nr_hpages, ops, expect);
+}
+
#define TICK 500000
-static bool wait_for_scan(const char *msg, char *p)
+static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops)
{
int full_scans;
int timeout = 6; /* 3 seconds */
/* Sanity check */
- if (check_huge(p)) {
+ if (!ops->check_huge(p, 0)) {
printf("Unexpected huge page\n");
exit(EXIT_FAILURE);
}
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
/* Wait until the second full_scan completed */
full_scans = read_num("khugepaged/full_scans") + 2;
printf("%s...", msg);
while (timeout--) {
- if (check_huge(p))
+ if (ops->check_huge(p, nr_hpages))
break;
if (read_num("khugepaged/full_scans") >= full_scans)
break;
@@ -496,122 +866,155 @@ static bool wait_for_scan(const char *msg, char *p)
usleep(TICK);
}
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
return timeout == -1;
}
+static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ if (wait_for_scan(msg, p, nr_hpages, ops)) {
+ if (expect)
+ fail("Timeout");
+ else
+ success("OK");
+ return;
+ }
+
+ /*
+ * For file and shmem memory, khugepaged only retracts pte entries after
+ * putting the new hugepage in the page cache. The hugepage must be
+ * subsequently refaulted to install the pmd mapping for the mm.
+ */
+ if (ops != &__anon_ops)
+ ops->fault(p, 0, nr_hpages * hpage_pmd_size);
+
+ if (ops->check_huge(p, expect ? nr_hpages : 0))
+ success("OK");
+ else
+ fail("Fail");
+}
+
+static struct collapse_context __khugepaged_context = {
+ .collapse = &khugepaged_collapse,
+ .enforce_pte_scan_limits = true,
+ .name = "khugepaged",
+};
+
+static struct collapse_context __madvise_context = {
+ .collapse = &madvise_collapse,
+ .enforce_pte_scan_limits = false,
+ .name = "madvise",
+};
+
+static bool is_tmpfs(struct mem_ops *ops)
+{
+ return ops == &__file_ops && finfo.type == VMA_SHMEM;
+}
+
static void alloc_at_fault(void)
{
- struct settings settings = default_settings;
+ struct settings settings = *current_settings();
char *p;
settings.thp_enabled = THP_ALWAYS;
- write_settings(&settings);
+ push_settings(&settings);
- p = alloc_mapping();
+ p = alloc_mapping(1);
*p = 1;
printf("Allocate huge page on fault...");
- if (check_huge(p))
+ if (check_huge_anon(p, 1, hpage_pmd_size))
success("OK");
else
fail("Fail");
- write_settings(&default_settings);
+ pop_settings();
madvise(p, page_size, MADV_DONTNEED);
printf("Split huge PMD on MADV_DONTNEED...");
- if (!check_huge(p))
+ if (check_huge_anon(p, 0, hpage_pmd_size))
success("OK");
else
fail("Fail");
munmap(p, hpage_pmd_size);
}
-static void collapse_full(void)
+static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
-
- p = alloc_mapping();
- fill_memory(p, 0, hpage_pmd_size);
- if (wait_for_scan("Collapse fully populated PTE table", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ int nr_hpages = 4;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+ c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
+ ops, true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
}
-static void collapse_empty(void)
+static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
- if (wait_for_scan("Do not collapse empty PTE table", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- munmap(p, hpage_pmd_size);
+ p = ops->setup_area(1);
+ c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_single_pte_entry(void)
+static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
- fill_memory(p, 0, page_size);
- if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+ p = ops->setup_area(1);
+ ops->fault(p, 0, page_size);
+ c->collapse("Collapse PTE table with single PTE entry present", p,
+ 1, ops, true);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_max_ptes_none(void)
+static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_none = hpage_pmd_nr / 2;
- struct settings settings = default_settings;
+ struct settings settings = *current_settings();
void *p;
settings.khugepaged.max_ptes_none = max_ptes_none;
- write_settings(&settings);
+ push_settings(&settings);
- p = alloc_mapping();
+ p = ops->setup_area(1);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
- if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ if (is_tmpfs(ops)) {
+ /* shmem pages always in the page cache */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
- if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
+ ops, !c->enforce_pte_scan_limits);
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
- munmap(p, hpage_pmd_size);
- write_settings(&default_settings);
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
+ true);
+ validate_memory(p, 0,
+ (hpage_pmd_nr - max_ptes_none) * page_size);
+ }
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
+ pop_settings();
}
-static void collapse_swapin_single_pte(void)
+static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
- fill_memory(p, 0, hpage_pmd_size);
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
printf("Swapout one page...");
if (madvise(p, page_size, MADV_PAGEOUT)) {
@@ -625,25 +1028,21 @@ static void collapse_swapin_single_pte(void)
goto out;
}
- if (wait_for_scan("Collapse with swapping in single PTE entry", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
+ true);
validate_memory(p, 0, hpage_pmd_size);
out:
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_max_ptes_swap(void)
+static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
void *p;
- p = alloc_mapping();
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
perror("madvise(MADV_PAGEOUT)");
@@ -656,115 +1055,93 @@ static void collapse_max_ptes_swap(void)
goto out;
}
- if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
+ c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
+ !c->enforce_pte_scan_limits);
validate_memory(p, 0, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
- printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
- if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
- if (check_swap(p, max_ptes_swap * page_size)) {
- success("OK");
- } else {
- fail("Fail");
- goto out;
- }
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap,
+ hpage_pmd_nr);
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, max_ptes_swap * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
- if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
+ c->collapse("Collapse with max_ptes_swap pages swapped out", p,
+ 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ }
out:
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_single_pte_entry_compound(void)
+static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
+ p = alloc_hpage(ops);
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (is_tmpfs(ops)) {
+ /* MADV_DONTNEED won't evict tmpfs pages */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
printf("Split huge page leaving single PTE mapping compound page...");
madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table with single PTE mapping compound page",
+ p, 1, ops, true);
validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_full_of_compound(void)
+static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
+ p = alloc_hpage(ops);
printf("Split huge page leaving single PTE page table full of compound pages...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- if (wait_for_scan("Collapse PTE table full of compound pages", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
+ true);
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_compound_extreme(void)
+static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
{
void *p;
int i;
- p = alloc_mapping();
+ p = ops->setup_area(1);
for (i = 0; i < hpage_pmd_nr; i++) {
printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
i + 1, hpage_pmd_nr);
madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(BASE_ADDR, 0, hpage_pmd_size);
- if (!check_huge(BASE_ADDR)) {
+ ops->fault(BASE_ADDR, 0, hpage_pmd_size);
+ if (!ops->check_huge(BASE_ADDR, 1)) {
printf("Failed to allocate huge page\n");
exit(EXIT_FAILURE);
}
@@ -791,34 +1168,30 @@ static void collapse_compound_extreme(void)
}
}
- munmap(BASE_ADDR, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
- if (!check_huge(p))
+ ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
+ ops->fault(p, 0, hpage_pmd_size);
+ if (!ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
- if (wait_for_scan("Collapse PTE table full of different compound pages", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table full of different compound pages", p, 1,
+ ops, true);
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_fork(void)
+static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
{
int wstatus;
void *p;
- p = alloc_mapping();
+ p = ops->setup_area(1);
printf("Allocate small page...");
- fill_memory(p, 0, page_size);
- if (!check_huge(p))
+ ops->fault(p, 0, page_size);
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
@@ -829,22 +1202,17 @@ static void collapse_fork(void)
skip_settings_restore = true;
exit_status = 0;
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- fill_memory(p, page_size, 2 * page_size);
-
- if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ ops->fault(p, page_size, 2 * page_size);
+ c->collapse("Collapse PTE table with single page shared with parent process",
+ p, 1, ops, true);
validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
exit(exit_status);
}
@@ -852,36 +1220,27 @@ static void collapse_fork(void)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has small page...");
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_fork_compound(void)
+static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
{
int wstatus;
void *p;
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
+ p = alloc_hpage(ops);
printf("Share huge page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
skip_settings_restore = true;
exit_status = 0;
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
@@ -889,24 +1248,20 @@ static void collapse_fork_compound(void)
printf("Split huge page PMD in child process...");
madvise(p, page_size, MADV_NOHUGEPAGE);
madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- if (!check_huge(p))
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
- fill_memory(p, 0, page_size);
+ ops->fault(p, 0, page_size);
write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
- if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Collapse PTE table full of compound pages in child",
+ p, 1, ops, true);
write_num("khugepaged/max_ptes_shared",
- default_settings.khugepaged.max_ptes_shared);
+ current_settings()->khugepaged.max_ptes_shared);
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
exit(exit_status);
}
@@ -914,74 +1269,59 @@ static void collapse_fork_compound(void)
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has huge page...");
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-static void collapse_max_ptes_shared()
+static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
{
int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
int wstatus;
void *p;
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
+ p = alloc_hpage(ops);
printf("Share huge page over fork()...");
if (!fork()) {
/* Do not touch settings on child exit */
skip_settings_restore = true;
exit_status = 0;
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
printf("Trigger CoW on page %d of %d...",
hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
- fail("Timeout");
- else if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Trigger CoW on page %d of %d...",
- hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
- if (!check_huge(p))
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+ if (ops->check_huge(p, 0))
success("OK");
else
fail("Fail");
-
- if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
+ c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
+ 1, ops, !c->enforce_pte_scan_limits);
+
+ if (c->enforce_pte_scan_limits) {
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
+ page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse with max_ptes_shared PTEs shared",
+ p, 1, ops, true);
+ }
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
exit(exit_status);
}
@@ -989,20 +1329,153 @@ static void collapse_max_ptes_shared()
exit_status += WEXITSTATUS(wstatus);
printf("Check if parent still has huge page...");
- if (check_huge(p))
+ if (ops->check_huge(p, 1))
success("OK");
else
fail("Fail");
validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
}
-int main(void)
+static void madvise_collapse_existing_thps(struct collapse_context *c,
+ struct mem_ops *ops)
{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+ c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+
+ /* c->collapse() will find a hugepage and complain - call directly. */
+ __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+/*
+ * Test race with khugepaged where page tables have been retracted and
+ * pmd cleared.
+ */
+static void madvise_retracted_page_tables(struct collapse_context *c,
+ struct mem_ops *ops)
+{
+ void *p;
+ int nr_hpages = 1;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+
+ /* Let khugepaged collapse and leave pmd cleared */
+ if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
+ ops)) {
+ fail("Timeout");
+ return;
+ }
+ success("OK");
+ c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
+ true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n");
+ fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
+ fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
+ fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
+ fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
+ fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
+ fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
+ exit(1);
+}
+
+static void parse_test_type(int argc, const char **argv)
+{
+ char *buf;
+ const char *token;
+
+ if (argc == 1) {
+ /* Backwards compatibility */
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ anon_ops = &__anon_ops;
+ return;
+ }
+
+ buf = strdup(argv[1]);
+ token = strsep(&buf, ":");
+
+ if (!strcmp(token, "all")) {
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ } else if (!strcmp(token, "khugepaged")) {
+ khugepaged_context = &__khugepaged_context;
+ } else if (!strcmp(token, "madvise")) {
+ madvise_context = &__madvise_context;
+ } else {
+ usage();
+ }
+
+ if (!buf)
+ usage();
+
+ if (!strcmp(buf, "all")) {
+ file_ops = &__file_ops;
+ anon_ops = &__anon_ops;
+ shmem_ops = &__shmem_ops;
+ } else if (!strcmp(buf, "anon")) {
+ anon_ops = &__anon_ops;
+ } else if (!strcmp(buf, "file")) {
+ file_ops = &__file_ops;
+ } else if (!strcmp(buf, "shmem")) {
+ shmem_ops = &__shmem_ops;
+ } else {
+ usage();
+ }
+
+ if (!file_ops)
+ return;
+
+ if (argc != 3)
+ usage();
+}
+
+int main(int argc, const char **argv)
+{
+ struct settings default_settings = {
+ .thp_enabled = THP_MADVISE,
+ .thp_defrag = THP_DEFRAG_ALWAYS,
+ .shmem_enabled = SHMEM_ADVISE,
+ .use_zero_page = 0,
+ .khugepaged = {
+ .defrag = 1,
+ .alloc_sleep_millisecs = 10,
+ .scan_sleep_millisecs = 10,
+ },
+ /*
+ * When testing file-backed memory, the collapse path
+ * looks at how many pages are found in the page cache, not
+ * what pages are mapped. Disable read ahead optimization so
+ * pages don't find their way into the page cache unless
+ * we mem_ops->fault() them in.
+ */
+ .read_ahead_kb = 0,
+ };
+
+ parse_test_type(argc, argv);
+
+ if (file_ops)
+ get_finfo(argv[2]);
+
setbuf(stdout, NULL);
page_size = getpagesize();
- hpage_pmd_size = read_num("hpage_pmd_size");
+ hpage_pmd_size = read_pmd_pagesize();
hpage_pmd_nr = hpage_pmd_size / page_size;
default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
@@ -1011,21 +1484,75 @@ int main(void)
default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
save_settings();
- adjust_settings();
+ push_settings(&default_settings);
alloc_at_fault();
- collapse_full();
- collapse_empty();
- collapse_single_pte_entry();
- collapse_max_ptes_none();
- collapse_swapin_single_pte();
- collapse_max_ptes_swap();
- collapse_single_pte_entry_compound();
- collapse_full_of_compound();
- collapse_compound_extreme();
- collapse_fork();
- collapse_fork_compound();
- collapse_max_ptes_shared();
+
+#define TEST(t, c, o) do { \
+ if (c && o) { \
+ printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
+ t(c, o); \
+ } \
+ } while (0)
+
+ TEST(collapse_full, khugepaged_context, anon_ops);
+ TEST(collapse_full, khugepaged_context, file_ops);
+ TEST(collapse_full, khugepaged_context, shmem_ops);
+ TEST(collapse_full, madvise_context, anon_ops);
+ TEST(collapse_full, madvise_context, file_ops);
+ TEST(collapse_full, madvise_context, shmem_ops);
+
+ TEST(collapse_empty, khugepaged_context, anon_ops);
+ TEST(collapse_empty, madvise_context, anon_ops);
+
+ TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
+ TEST(collapse_single_pte_entry, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry, madvise_context, file_ops);
+ TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
+
+ TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
+ TEST(collapse_max_ptes_none, madvise_context, anon_ops);
+ TEST(collapse_max_ptes_none, madvise_context, file_ops);
+
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
+
+ TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, file_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
+ TEST(collapse_full_of_compound, madvise_context, anon_ops);
+ TEST(collapse_full_of_compound, madvise_context, file_ops);
+ TEST(collapse_full_of_compound, madvise_context, shmem_ops);
+
+ TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
+ TEST(collapse_compound_extreme, madvise_context, anon_ops);
+
+ TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
+ TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
+
+ TEST(collapse_fork, khugepaged_context, anon_ops);
+ TEST(collapse_fork, madvise_context, anon_ops);
+
+ TEST(collapse_fork_compound, khugepaged_context, anon_ops);
+ TEST(collapse_fork_compound, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
+
+ TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
+
+ TEST(madvise_retracted_page_tables, madvise_context, file_ops);
+ TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
restore_settings(0);
}
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index db0270127aeb..9496346973d4 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -119,6 +119,50 @@ static unsigned long long get_mmap_min_addr(void)
}
/*
+ * This test validates that merge is called when expanding a mapping.
+ * Mapping containing three pages is created, middle page is unmapped
+ * and then the mapping containing the first page is expanded so that
+ * it fills the created hole. The two parts should merge creating
+ * single mapping with three pages.
+ */
+static void mremap_expand_merge(unsigned long page_size)
+{
+ char *test_name = "mremap expand merge";
+ FILE *fp;
+ char *line = NULL;
+ size_t len = 0;
+ bool success = false;
+ char *start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ munmap(start + page_size, page_size);
+ mremap(start, page_size, 2 * page_size, 0);
+
+ fp = fopen("/proc/self/maps", "r");
+ if (fp == NULL) {
+ ksft_test_result_fail("%s\n", test_name);
+ return;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ char *first = strtok(line, "- ");
+ void *first_val = (void *)strtol(first, NULL, 16);
+ char *second = strtok(NULL, "- ");
+ void *second_val = (void *) strtol(second, NULL, 16);
+
+ if (first_val == start && second_val == start + 3 * page_size) {
+ success = true;
+ break;
+ }
+ }
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+ fclose(fp);
+}
+
+/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
*/
@@ -336,6 +380,7 @@ int main(int argc, char **argv)
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
unsigned int pattern_seed;
+ int num_expand_tests = 1;
struct test test_cases[MAX_TEST];
struct test perf_test_cases[MAX_PERF_TEST];
int page_size;
@@ -407,12 +452,14 @@ int main(int argc, char **argv)
(threshold_mb * _1MB >= _1GB);
ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
- ARRAY_SIZE(perf_test_cases) : 0));
+ ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
pattern_seed);
+ mremap_expand_merge(page_size);
+
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index de86983b8a0f..e780e76c26b8 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -120,11 +120,16 @@ run_test ./gup_test -a
# Dump pages 0, 19, and 4096, using pin_user_pages:
run_test ./gup_test -ct -F 0x1 0 19 0x1000
-run_test ./userfaultfd anon 20 16
-# Test requires source and destination huge pages. Size of source
-# (half_ufd_size_MB) is passed as argument to test.
-run_test ./userfaultfd hugetlb "$half_ufd_size_MB" 32
-run_test ./userfaultfd shmem 20 16
+uffd_mods=("" ":dev")
+for mod in "${uffd_mods[@]}"; do
+ run_test ./userfaultfd anon${mod} 20 16
+ # Hugetlb tests require source and destination huge pages. Pass in half
+ # the size ($half_ufd_size_MB), which is used for *each*.
+ run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32
+ run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test
+ rm -f "$mnt"/uffd-test
+ run_test ./userfaultfd shmem${mod} 20 16
+done
#cleanup
umount "$mnt"
diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c
index e3a43f5d4fa2..21d8830c5f24 100644
--- a/tools/testing/selftests/vm/soft-dirty.c
+++ b/tools/testing/selftests/vm/soft-dirty.c
@@ -91,7 +91,7 @@ static void test_hugepage(int pagemap_fd, int pagesize)
for (i = 0; i < hpage_len; i++)
map[i] = (char)i;
- if (check_huge(map)) {
+ if (check_huge_anon(map, 1, hpage_len)) {
ksft_test_result_pass("Test %s huge page allocation\n", __func__);
clear_softdirty();
diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c
index 6aa2b8253aed..76e1c36dd9e5 100644
--- a/tools/testing/selftests/vm/split_huge_page_test.c
+++ b/tools/testing/selftests/vm/split_huge_page_test.c
@@ -92,7 +92,6 @@ void split_pmd_thp(void)
{
char *one_page;
size_t len = 4 * pmd_pagesize;
- uint64_t thp_size;
size_t i;
one_page = memalign(pmd_pagesize, len);
@@ -107,8 +106,7 @@ void split_pmd_thp(void)
for (i = 0; i < len; i++)
one_page[i] = (char)i;
- thp_size = check_huge(one_page);
- if (!thp_size) {
+ if (!check_huge_anon(one_page, 1, pmd_pagesize)) {
printf("No THP is allocated\n");
exit(EXIT_FAILURE);
}
@@ -124,9 +122,8 @@ void split_pmd_thp(void)
}
- thp_size = check_huge(one_page);
- if (thp_size) {
- printf("Still %ld kB AnonHugePages not split\n", thp_size);
+ if (check_huge_anon(one_page, 0, pmd_pagesize)) {
+ printf("Still AnonHugePages not split\n");
exit(EXIT_FAILURE);
}
@@ -172,8 +169,7 @@ void split_pte_mapped_thp(void)
for (i = 0; i < len; i++)
one_page[i] = (char)i;
- thp_size = check_huge(one_page);
- if (!thp_size) {
+ if (!check_huge_anon(one_page, 1, pmd_pagesize)) {
printf("No THP is allocated\n");
exit(EXIT_FAILURE);
}
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
index 539c9371e592..46e19b5d648d 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -52,21 +52,11 @@ load_driver()
usage
fi
fi
- if [ $? == 0 ]; then
- major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
- mknod /dev/hmm_dmirror0 c $major 0
- mknod /dev/hmm_dmirror1 c $major 1
- if [ $# -eq 2 ]; then
- mknod /dev/hmm_dmirror2 c $major 2
- mknod /dev/hmm_dmirror3 c $major 3
- fi
- fi
}
unload_driver()
{
modprobe -r $DRIVER > /dev/null 2>&1
- rm -f /dev/hmm_dmirror?
}
run_smoke()
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 7c3f1b0ab468..74babdbc02e5 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -61,10 +61,11 @@
#include <sys/random.h>
#include "../kselftest.h"
+#include "vm_util.h"
#ifdef __NR_userfaultfd
-static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size, hpage_size;
#define BOUNCE_RANDOM (1<<0)
#define BOUNCE_RACINGFAULTS (1<<1)
@@ -77,6 +78,13 @@ static int bounces;
#define TEST_SHMEM 3
static int test_type;
+#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
+
+#define BASE_PMD_ADDR ((void *)(1UL << 30))
+
+/* test using /dev/userfaultfd, instead of userfaultfd(2) */
+static bool test_dev_userfaultfd;
+
/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
#define ALARM_INTERVAL_SECS 10
static volatile bool test_uffdio_copy_eexist = true;
@@ -92,9 +100,10 @@ static int huge_fd;
static unsigned long long *count_verify;
static int uffd = -1;
static int uffd_flags, finished, *pipefd;
-static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
+static char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
static char *zeropage;
pthread_attr_t attr;
+static bool test_collapse;
/* Userfaultfd test statistics */
struct uffd_stats {
@@ -122,9 +131,13 @@ struct uffd_stats {
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+#define factor_of_2(x) ((x) ^ ((x) & ((x) - 1)))
+
const char *examples =
"# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
"./userfaultfd anon 100 99999\n\n"
+ "# Run the same anonymous memory test, but using /dev/userfaultfd:\n"
+ "./userfaultfd anon:dev 100 99999\n\n"
"# Run share memory test on 1GiB region with 99 bounces:\n"
"./userfaultfd shmem 1000 99\n\n"
"# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
@@ -141,6 +154,16 @@ static void usage(void)
"[hugetlbfs_file]\n\n");
fprintf(stderr, "Supported <test type>: anon, hugetlb, "
"hugetlb_shared, shmem\n\n");
+ fprintf(stderr, "'Test mods' can be joined to the test type string with a ':'. "
+ "Supported mods:\n");
+ fprintf(stderr, "\tsyscall - Use userfaultfd(2) (default)\n");
+ fprintf(stderr, "\tdev - Use /dev/userfaultfd instead of userfaultfd(2)\n");
+ fprintf(stderr, "\tcollapse - Test MADV_COLLAPSE of UFFDIO_REGISTER_MODE_MINOR\n"
+ "memory\n");
+ fprintf(stderr, "\nExample test mod usage:\n");
+ fprintf(stderr, "# Run anonymous memory test with /dev/userfaultfd:\n");
+ fprintf(stderr, "./userfaultfd anon:dev 100 99999\n\n");
+
fprintf(stderr, "Examples:\n\n");
fprintf(stderr, "%s", examples);
exit(1);
@@ -154,12 +177,14 @@ static void usage(void)
ret, __LINE__); \
} while (0)
-#define err(fmt, ...) \
+#define errexit(exitcode, fmt, ...) \
do { \
_err(fmt, ##__VA_ARGS__); \
- exit(1); \
+ exit(exitcode); \
} while (0)
+#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
+
static void uffd_stats_reset(struct uffd_stats *uffd_stats,
unsigned long n_cpus)
{
@@ -212,12 +237,10 @@ static void anon_release_pages(char *rel_area)
err("madvise(MADV_DONTNEED) failed");
}
-static void anon_allocate_area(void **alloc_area)
+static void anon_allocate_area(void **alloc_area, bool is_src)
{
*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (*alloc_area == MAP_FAILED)
- err("mmap of anonymous memory failed");
}
static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
@@ -235,7 +258,7 @@ static void hugetlb_release_pages(char *rel_area)
}
}
-static void hugetlb_allocate_area(void **alloc_area)
+static void hugetlb_allocate_area(void **alloc_area, bool is_src)
{
void *area_alias = NULL;
char **alloc_area_alias;
@@ -245,7 +268,7 @@ static void hugetlb_allocate_area(void **alloc_area)
nr_pages * page_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB |
- (*alloc_area == area_src ? 0 : MAP_NORESERVE),
+ (is_src ? 0 : MAP_NORESERVE),
-1,
0);
else
@@ -253,9 +276,9 @@ static void hugetlb_allocate_area(void **alloc_area)
nr_pages * page_size,
PROT_READ | PROT_WRITE,
MAP_SHARED |
- (*alloc_area == area_src ? 0 : MAP_NORESERVE),
+ (is_src ? 0 : MAP_NORESERVE),
huge_fd,
- *alloc_area == area_src ? 0 : nr_pages * page_size);
+ is_src ? 0 : nr_pages * page_size);
if (*alloc_area == MAP_FAILED)
err("mmap of hugetlbfs file failed");
@@ -265,12 +288,12 @@ static void hugetlb_allocate_area(void **alloc_area)
PROT_READ | PROT_WRITE,
MAP_SHARED,
huge_fd,
- *alloc_area == area_src ? 0 : nr_pages * page_size);
+ is_src ? 0 : nr_pages * page_size);
if (area_alias == MAP_FAILED)
err("mmap of hugetlb file alias failed");
}
- if (*alloc_area == area_src) {
+ if (is_src) {
alloc_area_alias = &area_src_alias;
} else {
alloc_area_alias = &area_dst_alias;
@@ -293,21 +316,36 @@ static void shmem_release_pages(char *rel_area)
err("madvise(MADV_REMOVE) failed");
}
-static void shmem_allocate_area(void **alloc_area)
+static void shmem_allocate_area(void **alloc_area, bool is_src)
{
void *area_alias = NULL;
- bool is_src = alloc_area == (void **)&area_src;
- unsigned long offset = is_src ? 0 : nr_pages * page_size;
+ size_t bytes = nr_pages * page_size;
+ unsigned long offset = is_src ? 0 : bytes;
+ char *p = NULL, *p_alias = NULL;
+
+ if (test_collapse) {
+ p = BASE_PMD_ADDR;
+ if (!is_src)
+ /* src map + alias + interleaved hpages */
+ p += 2 * (bytes + hpage_size);
+ p_alias = p;
+ p_alias += bytes;
+ p_alias += hpage_size; /* Prevent src/dst VMA merge */
+ }
- *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, shm_fd, offset);
+ *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ shm_fd, offset);
if (*alloc_area == MAP_FAILED)
err("mmap of memfd failed");
+ if (test_collapse && *alloc_area != p)
+ err("mmap of memfd failed at %p", p);
- area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED, shm_fd, offset);
+ area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ shm_fd, offset);
if (area_alias == MAP_FAILED)
err("mmap of memfd alias failed");
+ if (test_collapse && area_alias != p_alias)
+ err("mmap of anonymous memory failed at %p", p_alias);
if (is_src)
area_src_alias = area_alias;
@@ -320,28 +358,39 @@ static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
*start = (unsigned long)area_dst_alias + offset;
}
+static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages)
+{
+ if (!check_huge_shmem(area_dst_alias, expect_nr_hpages, hpage_size))
+ err("Did not find expected %d number of hugepages",
+ expect_nr_hpages);
+}
+
struct uffd_test_ops {
- void (*allocate_area)(void **alloc_area);
+ void (*allocate_area)(void **alloc_area, bool is_src);
void (*release_pages)(char *rel_area);
void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
+ void (*check_pmd_mapping)(void *p, int expect_nr_hpages);
};
static struct uffd_test_ops anon_uffd_test_ops = {
.allocate_area = anon_allocate_area,
.release_pages = anon_release_pages,
.alias_mapping = noop_alias_mapping,
+ .check_pmd_mapping = NULL,
};
static struct uffd_test_ops shmem_uffd_test_ops = {
.allocate_area = shmem_allocate_area,
.release_pages = shmem_release_pages,
.alias_mapping = shmem_alias_mapping,
+ .check_pmd_mapping = shmem_check_pmd_mapping,
};
static struct uffd_test_ops hugetlb_uffd_test_ops = {
.allocate_area = hugetlb_allocate_area,
.release_pages = hugetlb_release_pages,
.alias_mapping = hugetlb_alias_mapping,
+ .check_pmd_mapping = NULL,
};
static struct uffd_test_ops *uffd_test_ops;
@@ -383,13 +432,34 @@ static void assert_expected_ioctls_present(uint64_t mode, uint64_t ioctls)
}
}
+static int __userfaultfd_open_dev(void)
+{
+ int fd, _uffd;
+
+ fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ errexit(KSFT_SKIP, "opening /dev/userfaultfd failed");
+
+ _uffd = ioctl(fd, USERFAULTFD_IOC_NEW, UFFD_FLAGS);
+ if (_uffd < 0)
+ errexit(errno == ENOTTY ? KSFT_SKIP : 1,
+ "creating userfaultfd failed");
+ close(fd);
+ return _uffd;
+}
+
static void userfaultfd_open(uint64_t *features)
{
struct uffdio_api uffdio_api;
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
- if (uffd < 0)
- err("userfaultfd syscall not available in this kernel");
+ if (test_dev_userfaultfd)
+ uffd = __userfaultfd_open_dev();
+ else {
+ uffd = syscall(__NR_userfaultfd, UFFD_FLAGS);
+ if (uffd < 0)
+ errexit(errno == ENOSYS ? KSFT_SKIP : 1,
+ "creating userfaultfd failed");
+ }
uffd_flags = fcntl(uffd, F_GETFD, NULL);
uffdio_api.api = UFFD_API;
@@ -440,6 +510,7 @@ static void uffd_test_ctx_clear(void)
munmap_area((void **)&area_src_alias);
munmap_area((void **)&area_dst);
munmap_area((void **)&area_dst_alias);
+ munmap_area((void **)&area_remap);
}
static void uffd_test_ctx_init(uint64_t features)
@@ -448,8 +519,8 @@ static void uffd_test_ctx_init(uint64_t features)
uffd_test_ctx_clear();
- uffd_test_ops->allocate_area((void **)&area_src);
- uffd_test_ops->allocate_area((void **)&area_dst);
+ uffd_test_ops->allocate_area((void **)&area_src, true);
+ uffd_test_ops->allocate_area((void **)&area_dst, false);
userfaultfd_open(&features);
@@ -766,6 +837,7 @@ static void *uffd_poll_thread(void *arg)
err("remove failure");
break;
case UFFD_EVENT_REMAP:
+ area_remap = area_dst; /* save for later unmap */
area_dst = (char *)(unsigned long)msg.arg.remap.to;
break;
}
@@ -1218,13 +1290,30 @@ static int userfaultfd_sig_test(void)
return userfaults != 0;
}
+void check_memory_contents(char *p)
+{
+ unsigned long i;
+ uint8_t expected_byte;
+ void *expected_page;
+
+ if (posix_memalign(&expected_page, page_size, page_size))
+ err("out of memory");
+
+ for (i = 0; i < nr_pages; ++i) {
+ expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
+ memset(expected_page, expected_byte, page_size);
+ if (my_bcmp(expected_page, p + (i * page_size), page_size))
+ err("unexpected page contents after minor fault");
+ }
+
+ free(expected_page);
+}
+
static int userfaultfd_minor_test(void)
{
- struct uffdio_register uffdio_register;
unsigned long p;
+ struct uffdio_register uffdio_register;
pthread_t uffd_mon;
- uint8_t expected_byte;
- void *expected_page;
char c;
struct uffd_stats stats = { 0 };
@@ -1263,17 +1352,7 @@ static int userfaultfd_minor_test(void)
* fault. uffd_poll_thread will resolve the fault by bit-flipping the
* page's contents, and then issuing a CONTINUE ioctl.
*/
-
- if (posix_memalign(&expected_page, page_size, page_size))
- err("out of memory");
-
- for (p = 0; p < nr_pages; ++p) {
- expected_byte = ~((uint8_t)(p % ((uint8_t)-1)));
- memset(expected_page, expected_byte, page_size);
- if (my_bcmp(expected_page, area_dst_alias + (p * page_size),
- page_size))
- err("unexpected page contents after minor fault");
- }
+ check_memory_contents(area_dst_alias);
if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
err("pipe write");
@@ -1282,6 +1361,23 @@ static int userfaultfd_minor_test(void)
uffd_stats_report(&stats, 1);
+ if (test_collapse) {
+ printf("testing collapse of uffd memory into PMD-mapped THPs:");
+ if (madvise(area_dst_alias, nr_pages * page_size,
+ MADV_COLLAPSE))
+ err("madvise(MADV_COLLAPSE)");
+
+ uffd_test_ops->check_pmd_mapping(area_dst,
+ nr_pages * page_size /
+ hpage_size);
+ /*
+ * This won't cause uffd-fault - it purely just makes sure there
+ * was no corruption.
+ */
+ check_memory_contents(area_dst_alias);
+ printf(" done.\n");
+ }
+
return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
}
@@ -1584,8 +1680,6 @@ unsigned long default_huge_page_size(void)
static void set_test_type(const char *type)
{
- uint64_t features = UFFD_API_FEATURES;
-
if (!strcmp(type, "anon")) {
test_type = TEST_ANON;
uffd_test_ops = &anon_uffd_test_ops;
@@ -1603,12 +1697,37 @@ static void set_test_type(const char *type)
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
test_uffdio_minor = true;
- } else {
- err("Unknown test type: %s", type);
}
+}
+
+static void parse_test_type_arg(const char *raw_type)
+{
+ char *buf = strdup(raw_type);
+ uint64_t features = UFFD_API_FEATURES;
+
+ while (buf) {
+ const char *token = strsep(&buf, ":");
+
+ if (!test_type)
+ set_test_type(token);
+ else if (!strcmp(token, "dev"))
+ test_dev_userfaultfd = true;
+ else if (!strcmp(token, "syscall"))
+ test_dev_userfaultfd = false;
+ else if (!strcmp(token, "collapse"))
+ test_collapse = true;
+ else
+ err("unrecognized test mod '%s'", token);
+ }
+
+ if (!test_type)
+ err("failed to parse test type argument: '%s'", raw_type);
+
+ if (test_collapse && test_type != TEST_SHMEM)
+ err("Unsupported test: %s", raw_type);
if (test_type == TEST_HUGETLB)
- page_size = default_huge_page_size();
+ page_size = hpage_size;
else
page_size = sysconf(_SC_PAGE_SIZE);
@@ -1646,6 +1765,8 @@ static void sigalrm(int sig)
int main(int argc, char **argv)
{
+ size_t bytes;
+
if (argc < 4)
usage();
@@ -1653,11 +1774,41 @@ int main(int argc, char **argv)
err("failed to arm SIGALRM");
alarm(ALARM_INTERVAL_SECS);
- set_test_type(argv[1]);
+ hpage_size = default_huge_page_size();
+ parse_test_type_arg(argv[1]);
+ bytes = atol(argv[2]) * 1024 * 1024;
+
+ if (test_collapse && bytes & (hpage_size - 1))
+ err("MiB must be multiple of %lu if :collapse mod set",
+ hpage_size >> 20);
nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
- nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
- nr_cpus;
+
+ if (test_collapse) {
+ /* nr_cpus must divide (bytes / page_size), otherwise,
+ * area allocations of (nr_pages * paze_size) won't be a
+ * multiple of hpage_size, even if bytes is a multiple of
+ * hpage_size.
+ *
+ * This means that nr_cpus must divide (N * (2 << (H-P))
+ * where:
+ * bytes = hpage_size * N
+ * hpage_size = 2 << H
+ * page_size = 2 << P
+ *
+ * And we want to chose nr_cpus to be the largest value
+ * satisfying this constraint, not larger than the number
+ * of online CPUs. Unfortunately, prime factorization of
+ * N and nr_cpus may be arbitrary, so have to search for it.
+ * Instead, just use the highest power of 2 dividing both
+ * nr_cpus and (bytes / page_size).
+ */
+ int x = factor_of_2(nr_cpus);
+ int y = factor_of_2(bytes / page_size);
+
+ nr_cpus = x < y ? x : y;
+ }
+ nr_pages_per_cpu = bytes / page_size / nr_cpus;
if (!nr_pages_per_cpu) {
_err("invalid MiB");
usage();
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
index b58ab11a7a30..f11f8adda521 100644
--- a/tools/testing/selftests/vm/vm_util.c
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -42,9 +42,9 @@ void clear_softdirty(void)
ksft_exit_fail_msg("writing clear_refs failed\n");
}
-static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
{
- while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+ while (fgets(buf, len, fp)) {
if (!strncmp(buf, pattern, strlen(pattern)))
return true;
}
@@ -72,9 +72,10 @@ uint64_t read_pmd_pagesize(void)
return strtoul(buf, NULL, 10);
}
-uint64_t check_huge(void *addr)
+bool __check_huge(void *addr, char *pattern, int nr_hpages,
+ uint64_t hpage_size)
{
- uint64_t thp = 0;
+ uint64_t thp = -1;
int ret;
FILE *fp;
char buffer[MAX_LINE_LENGTH];
@@ -89,20 +90,37 @@ uint64_t check_huge(void *addr)
if (!fp)
ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
- if (!check_for_pattern(fp, addr_pattern, buffer))
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
goto err_out;
/*
- * Fetch the AnonHugePages: in the same block and check the number of
+ * Fetch the pattern in the same block and check the number of
* hugepages.
*/
- if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+ if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
goto err_out;
- if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1)
+ snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
+
+ if (sscanf(buffer, addr_pattern, &thp) != 1)
ksft_exit_fail_msg("Reading smap error\n");
err_out:
fclose(fp);
- return thp;
+ return thp == (nr_hpages * (hpage_size >> 10));
+}
+
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
+}
+
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
+}
+
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
}
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
index 2e512bd57ae1..5c35de454e08 100644
--- a/tools/testing/selftests/vm/vm_util.h
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -5,5 +5,8 @@
uint64_t pagemap_get_entry(int fd, char *start);
bool pagemap_is_softdirty(int fd, char *start);
void clear_softdirty(void);
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
uint64_t read_pmd_pagesize(void);
-uint64_t check_huge(void *addr);
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);