diff options
Diffstat (limited to 'tools/testing/selftests/mm')
48 files changed, 3671 insertions, 978 deletions
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 8f01f4da1c0d..824266982aa3 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -20,6 +20,7 @@ mremap_test on-fault-limit transhuge-stress pagemap_ioctl +pfnmap *.tmp* protection_keys protection_keys_32 @@ -27,6 +28,7 @@ protection_keys_64 madv_populate uffd-stress uffd-unit-tests +uffd-wp-mremap mlock-intersect-test mlock-random-test virtual_address_range @@ -36,6 +38,9 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests memfd_secret +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 soft-dirty split_huge_page_test ksm_tests @@ -49,9 +54,9 @@ va_high_addr_switch hugetlb_fault_after_madv hugetlb_madv_vs_map mseal_test -seal_elf droppable hugetlb_dio pkey_sighandler_tests_32 pkey_sighandler_tests_64 -guard-pages +guard-regions +merge diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 6fad5007dea1..ae6f994d3add 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -82,13 +82,14 @@ TEST_GEN_FILES += mrelease_test TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += mremap_test TEST_GEN_FILES += mseal_test -TEST_GEN_FILES += seal_elf TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += pagemap_ioctl +TEST_GEN_FILES += pfnmap TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += uffd-stress TEST_GEN_FILES += uffd-unit-tests +TEST_GEN_FILES += uffd-wp-mremap TEST_GEN_FILES += split_huge_page_test TEST_GEN_FILES += ksm_tests TEST_GEN_FILES += ksm_functional_tests @@ -97,7 +98,8 @@ TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable -TEST_GEN_FILES += guard-pages +TEST_GEN_FILES += guard-regions +TEST_GEN_FILES += merge ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -159,11 +161,16 @@ $(TEST_GEN_FILES): vm_util.c thp_settings.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c +$(OUTPUT)/uffd-wp-mremap: uffd-common.c +$(OUTPUT)/protection_keys: pkey_util.c +$(OUTPUT)/pkey_sighandler_tests: pkey_util.c ifeq ($(ARCH),x86_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) +$(BINARIES_32) $(BINARIES_64): pkey_util.c + define gen-target-rule-32 $(1) $(1)_32: $(OUTPUT)/$(1)_32 .PHONY: $(1) $(1)_32 diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f..e1fe16bcbbe8 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 2c3a0eb6b22d..9bc4591c7b16 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int compaction_index = 0; char nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), "%lu", initial_nr_hugepages); @@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, goto out; } - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index 4309916f629e..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -7,3 +7,7 @@ CONFIG_TEST_HMM=m CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y +CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index 1238e1c5aae1..dbbcc5eb3dce 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -112,9 +112,12 @@ struct comm_pipes { static int setup_comm_pipes(struct comm_pipes *comm_pipes) { - if (pipe(comm_pipes->child_ready) < 0) + if (pipe(comm_pipes->child_ready) < 0) { + ksft_perror("pipe()"); return -errno; + } if (pipe(comm_pipes->parent_ready) < 0) { + ksft_perror("pipe()"); close(comm_pipes->child_ready[0]); close(comm_pipes->child_ready[1]); return -errno; @@ -207,13 +210,14 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { exit(fn(mem, size, &comm_pipes)); @@ -228,9 +232,18 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, * write-faults by directly mapping pages writable. */ ret = mprotect(mem, size, PROT_READ); - ret |= mprotect(mem, size, PROT_READ|PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + write(comm_pipes.parent_ready[1], "0", 1); + wait(&ret); + goto close_comm_pipes; + } + + ret = mprotect(mem, size, PROT_READ|PROT_WRITE); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -248,16 +261,16 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect, ret = -EINVAL; if (!ret) { - ksft_test_result_pass("No leak from parent into child\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from parent into child\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from parent into child\n"); + log_test_result(KSFT_FAIL); } close_comm_pipes: close_comm_pipes(&comm_pipes); @@ -293,7 +306,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; @@ -306,26 +319,29 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free; } if (pipe(fds) < 0) { - ksft_test_result_fail("pipe() failed\n"); + ksft_perror("pipe() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } if (before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_print_msg("vmsplice() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed\n"); + log_test_result(KSFT_FAIL); goto close_pipe; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -339,7 +355,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, if (!before_fork) { transferred = vmsplice(fds[1], &iov, 1, 0); if (transferred <= 0) { - ksft_test_result_fail("vmsplice() failed\n"); + ksft_perror("vmsplice() failed"); + log_test_result(KSFT_FAIL); wait(&ret); goto close_pipe; } @@ -348,7 +365,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, while (read(comm_pipes.child_ready[0], &buf, 1) != 1) ; if (munmap(mem, size) < 0) { - ksft_test_result_fail("munmap() failed\n"); + ksft_perror("munmap() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } write(comm_pipes.parent_ready[1], "0", 1); @@ -356,7 +374,8 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, /* Wait until the child is done writing. */ wait(&ret); if (!WIFEXITED(ret)) { - ksft_test_result_fail("wait() failed\n"); + ksft_perror("wait() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } @@ -364,22 +383,23 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, for (total = 0; total < transferred; total += cur) { cur = read(fds[0], new + total, transferred - total); if (cur < 0) { - ksft_test_result_fail("read() failed\n"); + ksft_perror("read() failed"); + log_test_result(KSFT_FAIL); goto close_pipe; } } if (!memcmp(old, new, transferred)) { - ksft_test_result_pass("No leak from child into parent\n"); + log_test_result(KSFT_PASS); } else if (xfail) { /* * With hugetlb, some vmsplice() tests are currently expected to * fail because (a) harder to fix and (b) nobody really cares. * Flag them as expected failure for now. */ - ksft_test_result_xfail("Leak from child into parent\n"); + log_test_result(KSFT_XFAIL); } else { - ksft_test_result_fail("Leak from child into parent\n"); + log_test_result(KSFT_FAIL); } close_pipe: close(fds[0]); @@ -416,13 +436,14 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } fd = fileno(file); @@ -430,14 +451,16 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); goto close_file; } /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed\n"); + log_test_result(KSFT_SKIP); goto free_tmp; } @@ -452,7 +475,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) iov.iov_len = size; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) { - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed\n"); + log_test_result(KSFT_SKIP); goto queue_exit; } @@ -463,7 +487,8 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -483,10 +508,17 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) * if the page is mapped R/O vs. R/W). */ ret = mprotect(mem, size, PROT_READ); + if (ret) { + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); + goto unregister_buffers; + } + clear_softdirty(); - ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); + ret = mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto unregister_buffers; } } @@ -498,25 +530,29 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) memset(mem, 0xff, size); sqe = io_uring_get_sqe(&ring); if (!sqe) { - ksft_test_result_fail("io_uring_get_sqe() failed\n"); + ksft_print_msg("io_uring_get_sqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0); ret = io_uring_submit(&ring); if (ret < 0) { - ksft_test_result_fail("io_uring_submit() failed\n"); + ksft_print_msg("io_uring_submit() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } ret = io_uring_wait_cqe(&ring, &cqe); if (ret < 0) { - ksft_test_result_fail("io_uring_wait_cqe() failed\n"); + ksft_print_msg("io_uring_wait_cqe() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } if (cqe->res != size) { - ksft_test_result_fail("write_fixed failed\n"); + ksft_print_msg("write_fixed failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } io_uring_cqe_seen(&ring, cqe); @@ -526,15 +562,18 @@ static void do_test_iouring(char *mem, size_t size, bool use_fork) while (total < size) { cur = pread(fd, tmp + total, size - total, total); if (cur < 0) { - ksft_test_result_fail("pread() failed\n"); + ksft_print_msg("pread() failed\n"); + log_test_result(KSFT_FAIL); goto quit_child; } total += cur; } /* Finally, check if we read what we expected. */ - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/W pin is reliable\n"); + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); quit_child: if (use_fork) { @@ -582,19 +621,21 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, int ret; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + log_test_result(KSFT_SKIP); return; } tmp = malloc(size); if (!tmp) { - ksft_test_result_fail("malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); + log_test_result(KSFT_FAIL); return; } ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); goto free_tmp; } @@ -609,7 +650,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { write(comm_pipes.child_ready[1], "0", 1); @@ -646,7 +688,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, clear_softdirty(); ret |= mprotect(mem, size, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -661,9 +704,11 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret) { if (errno == EINVAL) - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_SKIP; else - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ret = KSFT_FAIL; + ksft_perror("PIN_LONGTERM_TEST_START failed"); + log_test_result(ret); goto wait; } @@ -676,22 +721,26 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test, */ tmp_val = (__u64)(uintptr_t)tmp; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val); - if (ret) - ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n"); - else - ksft_test_result(!memcmp(mem, tmp, size), - "Longterm R/O pin is reliable\n"); + if (ret) { + ksft_perror("PIN_LONGTERM_TEST_READ failed"); + log_test_result(KSFT_FAIL); + } else { + if (!memcmp(mem, tmp, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); + } ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP); if (ret) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_perror("PIN_LONGTERM_TEST_STOP failed"); wait: switch (test) { case RO_PIN_TEST_SHARED: write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); if (!WIFEXITED(ret)) - ksft_print_msg("[INFO] wait() failed\n"); + ksft_perror("wait() failed"); break; default: break; @@ -746,14 +795,16 @@ static void do_run_with_base_page(test_fn fn, bool swapout) mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } ret = madvise(mem, pagesize, MADV_NOHUGEPAGE); /* Ignore if not around on a kernel. */ if (ret && errno != EINVAL) { - ksft_test_result_fail("MADV_NOHUGEPAGE failed\n"); + ksft_perror("MADV_NOHUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -763,7 +814,8 @@ static void do_run_with_base_page(test_fn fn, bool swapout) if (swapout) { madvise(mem, pagesize, MADV_PAGEOUT); if (!pagemap_is_swapped(pagemap_fd, mem)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } } @@ -775,13 +827,13 @@ munmap: static void run_with_base_page(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with base page\n", desc); + log_test_start("%s ... with base page", desc); do_run_with_base_page(fn, false); } static void run_with_base_page_swap(test_fn fn, const char *desc) { - ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc); + log_test_start("%s ... with swapped out base page", desc); do_run_with_base_page(fn, true); } @@ -807,7 +859,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } @@ -816,7 +869,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) ret = madvise(mem, thpsize, MADV_HUGEPAGE); if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + ksft_perror("MADV_HUGEPAGE failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -826,7 +880,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ mem[0] = 1; if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) { - ksft_test_result_skip("Did not get a THP populated\n"); + ksft_print_msg("Did not get a THP populated\n"); + log_test_result(KSFT_SKIP); goto munmap; } memset(mem, 1, thpsize); @@ -846,12 +901,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -863,7 +920,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED); if (ret) { - ksft_test_result_fail("MADV_DONTNEED failed\n"); + ksft_perror("MADV_DONTNEED failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = pagesize; @@ -876,14 +934,16 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) mremap_size = thpsize / 2; mremap_mem = mmap(NULL, mremap_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (mremap_mem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } tmp = mremap(mem + mremap_size, mremap_size, mremap_size, MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem); if (tmp != mremap_mem) { - ksft_test_result_fail("mremap() failed\n"); + ksft_perror("mremap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } size = mremap_size; @@ -896,12 +956,14 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto munmap; } else if (!ret) { exit(0); @@ -910,7 +972,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) /* Allow for sharing all pages again. */ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); goto munmap; } break; @@ -924,7 +987,8 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize) case THP_RUN_SINGLE_PTE_SWAPOUT: madvise(mem, size, MADV_PAGEOUT); if (!range_is_swapped(mem, size)) { - ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + ksft_print_msg("MADV_PAGEOUT did not work, is swap enabled?\n"); + log_test_result(KSFT_SKIP); goto munmap; } break; @@ -941,56 +1005,56 @@ munmap: static void run_with_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n", + log_test_start("%s ... with THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD, size); } static void run_with_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n", + log_test_start("%s ... with swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size); } static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE, size); } static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n", + log_test_start("%s ... with swapped-out, PTE-mapped THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size); } static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n", + log_test_start("%s ... with single PTE of THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size); } static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n", + log_test_start("%s ... with single PTE of swapped-out THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size); } static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n", + log_test_start("%s ... with partially mremap()'ed THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size); } static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size) { - ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n", + log_test_start("%s ... with partially shared THP (%zu kB)", desc, size / 1024); do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size); } @@ -1000,14 +1064,15 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; char *mem, *dummy; - ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT; mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); return; } @@ -1020,7 +1085,8 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize) */ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0); if (dummy == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto munmap; } munmap(dummy, hugetlbsize); @@ -1226,7 +1292,7 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = setup_comm_pipes(&comm_pipes); if (ret) { - ksft_test_result_fail("pipe() failed\n"); + log_test_result(KSFT_FAIL); return; } @@ -1236,12 +1302,14 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = mprotect(mem + pagesize, pagesize, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_perror("mprotect() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } @@ -1250,8 +1318,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before actually COW-sharing the page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); goto close_comm_pipes; } break; @@ -1262,7 +1330,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the upper part of the THP. */ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1270,7 +1339,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Don't COW-share the lower part of the THP. */ ret = madvise(mem, size / 2, MADV_DONTFORK); if (ret) { - ksft_test_result_fail("MADV_DONTFORK failed\n"); + ksft_perror("MADV_DONTFORK failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } break; @@ -1280,7 +1350,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, ret = fork(); if (ret < 0) { - ksft_test_result_fail("fork() failed\n"); + ksft_perror("fork() failed"); + log_test_result(KSFT_FAIL); goto close_comm_pipes; } else if (!ret) { switch (test) { @@ -1314,7 +1385,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, */ ret = madvise(mem, size, MADV_DOFORK); if (ret) { - ksft_test_result_fail("MADV_DOFORK failed\n"); + ksft_perror("MADV_DOFORK failed"); + log_test_result(KSFT_FAIL); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1324,8 +1396,8 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, /* Collapse before anyone modified the COW-shared page. */ ret = madvise(mem, size, MADV_COLLAPSE); if (ret) { - ksft_test_result_skip("MADV_COLLAPSE failed: %s\n", - strerror(errno)); + ksft_perror("MADV_COLLAPSE failed"); + log_test_result(KSFT_SKIP); write(comm_pipes.parent_ready[1], "0", 1); wait(&ret); goto close_comm_pipes; @@ -1345,7 +1417,10 @@ static void do_test_anon_thp_collapse(char *mem, size_t size, else ret = -EINVAL; - ksft_test_result(!ret, "No leak from parent into child\n"); + if (!ret) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); close_comm_pipes: close_comm_pipes(&comm_pipes); } @@ -1430,7 +1505,7 @@ static void run_anon_thp_test_cases(void) for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) { struct test_case const *test_case = &anon_thp_test_cases[i]; - ksft_print_msg("[RUN] %s\n", test_case->desc); + log_test_start("%s", test_case->desc); do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize); } } @@ -1453,8 +1528,10 @@ static void test_cow(char *mem, const char *smem, size_t size) memset(mem, 0xff, size); /* See if we still read the old values via the other mapping. */ - ksft_test_result(!memcmp(smem, old, size), - "Other mapping not modified\n"); + if (!memcmp(smem, old, size)) + log_test_result(KSFT_PASS); + else + log_test_result(KSFT_FAIL); free(old); } @@ -1472,18 +1549,20 @@ static void run_with_zeropage(non_anon_test_fn fn, const char *desc) { char *mem, *smem, tmp; - ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc); + log_test_start("%s ... with shared zeropage", desc); mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1504,10 +1583,11 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) size_t mmap_size; int ret; - ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc); + log_test_start("%s ... with huge zeropage", desc); if (!has_huge_zeropage) { - ksft_test_result_skip("Huge zeropage not enabled\n"); + ksft_print_msg("Huge zeropage not enabled\n"); + log_test_result(KSFT_SKIP); return; } @@ -1516,13 +1596,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); return; } mmap_smem = mmap(NULL, mmap_size, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mmap_smem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1531,9 +1613,15 @@ static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc) smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1)); ret = madvise(mem, pmdsize, MADV_HUGEPAGE); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); + goto munmap; + } ret |= madvise(smem, pmdsize, MADV_HUGEPAGE); - if (ret) { - ksft_test_result_fail("MADV_HUGEPAGE failed\n"); + if (ret != 0) { + ksft_perror("madvise()"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1562,29 +1650,33 @@ static void run_with_memfd(non_anon_test_fn fn, const char *desc) char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_FAIL); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1607,35 +1699,40 @@ static void run_with_tmpfile(non_anon_test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); + ksft_perror("tmpfile() failed"); + log_test_result(KSFT_FAIL); return; } fd = fileno(file); if (fd < 0) { - ksft_test_result_skip("fileno() failed\n"); + ksft_perror("fileno() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, pagesize)) { - ksft_test_result_fail("fallocate() failed\n"); + ksft_perror("fallocate() failed"); + log_test_result(KSFT_FAIL); goto close; } /* Create a private mapping of the memfd. */ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto close; } smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1659,20 +1756,22 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, char *mem, *smem, tmp; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_perror("memfd_create() failed"); + log_test_result(KSFT_SKIP); return; } /* File consists of a single page filled with zeroes. */ if (fallocate(fd, 0, 0, hugetlbsize)) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } @@ -1680,12 +1779,14 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - ksft_test_result_skip("need more free huge pages\n"); + ksft_perror("need more free huge pages"); + log_test_result(KSFT_SKIP); goto close; } smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0); - if (mem == MAP_FAILED) { - ksft_test_result_fail("mmap() failed\n"); + if (smem == MAP_FAILED) { + ksft_perror("mmap() failed"); + log_test_result(KSFT_FAIL); goto munmap; } @@ -1696,7 +1797,7 @@ static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc, fn(mem, smem, hugetlbsize); munmap: munmap(mem, hugetlbsize); - if (mem != MAP_FAILED) + if (smem != MAP_FAILED) munmap(smem, hugetlbsize); close: close(fd); @@ -1771,7 +1872,6 @@ static int tests_per_non_anon_test_case(void) int main(int argc, char **argv) { - int err; struct thp_settings default_settings; ksft_print_header(); @@ -1811,9 +1911,5 @@ int main(int argc, char **argv) thp_restore_settings(); } - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-regions.c index 7cdf815d0d63..93af3d3760f9 100644 --- a/tools/testing/selftests/mm/guard-pages.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -6,7 +6,9 @@ #include <assert.h> #include <errno.h> #include <fcntl.h> +#include <linux/limits.h> #include <linux/userfaultfd.h> +#include <linux/fs.h> #include <setjmp.h> #include <signal.h> #include <stdbool.h> @@ -18,6 +20,9 @@ #include <sys/syscall.h> #include <sys/uio.h> #include <unistd.h> +#include "vm_util.h" + +#include "../pidfd/pidfd.h" /* * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: @@ -37,6 +42,79 @@ static sigjmp_buf signal_jmp_buf; */ #define FORCE_READ(x) (*(volatile typeof(x) *)x) +/* + * How is the test backing the mapping being tested? + */ +enum backing_type { + ANON_BACKED, + SHMEM_BACKED, + LOCAL_FILE_BACKED, +}; + +FIXTURE(guard_regions) +{ + unsigned long page_size; + char path[PATH_MAX]; + int fd; +}; + +FIXTURE_VARIANT(guard_regions) +{ + enum backing_type backing; +}; + +FIXTURE_VARIANT_ADD(guard_regions, anon) +{ + .backing = ANON_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, shmem) +{ + .backing = SHMEM_BACKED, +}; + +FIXTURE_VARIANT_ADD(guard_regions, file) +{ + .backing = LOCAL_FILE_BACKED, +}; + +static bool is_anon_backed(const FIXTURE_VARIANT(guard_regions) * variant) +{ + switch (variant->backing) { + case ANON_BACKED: + case SHMEM_BACKED: + return true; + default: + return false; + } +} + +static void *mmap_(FIXTURE_DATA(guard_regions) * self, + const FIXTURE_VARIANT(guard_regions) * variant, + void *addr, size_t length, int prot, int extra_flags, + off_t offset) +{ + int fd; + int flags = extra_flags; + + switch (variant->backing) { + case ANON_BACKED: + flags |= MAP_PRIVATE | MAP_ANON; + fd = -1; + break; + case SHMEM_BACKED: + case LOCAL_FILE_BACKED: + flags |= MAP_SHARED; + fd = self->fd; + break; + default: + ksft_exit_fail(); + break; + } + + return mmap(addr, length, prot, flags, fd, offset); +} + static int userfaultfd(int flags) { return syscall(SYS_userfaultfd, flags); @@ -50,9 +128,10 @@ static void handle_fatal(int c) siglongjmp(signal_jmp_buf, c); } -static int pidfd_open(pid_t pid, unsigned int flags) +static ssize_t sys_process_madvise(int pidfd, const struct iovec *iovec, + size_t n, int advice, unsigned int flags) { - return syscall(SYS_pidfd_open, pid, flags); + return syscall(__NR_process_madvise, pidfd, iovec, n, advice, flags); } /* @@ -101,12 +180,7 @@ static bool try_read_write_buf(char *ptr) return try_read_buf(ptr) && try_write_buf(ptr); } -FIXTURE(guard_pages) -{ - unsigned long page_size; -}; - -FIXTURE_SETUP(guard_pages) +static void setup_sighandler(void) { struct sigaction act = { .sa_handler = &handle_fatal, @@ -116,11 +190,9 @@ FIXTURE_SETUP(guard_pages) sigemptyset(&act.sa_mask); if (sigaction(SIGSEGV, &act, NULL)) ksft_exit_fail_perror("sigaction"); +} - self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); -}; - -FIXTURE_TEARDOWN(guard_pages) +static void teardown_sighandler(void) { struct sigaction act = { .sa_handler = SIG_DFL, @@ -131,15 +203,113 @@ FIXTURE_TEARDOWN(guard_pages) sigaction(SIGSEGV, &act, NULL); } -TEST_F(guard_pages, basic) +static int open_file(const char *prefix, char *path) +{ + int fd; + + snprintf(path, PATH_MAX, "%sguard_regions_test_file_XXXXXX", prefix); + fd = mkstemp(path); + if (fd < 0) + ksft_exit_fail_perror("mkstemp"); + + return fd; +} + +/* Establish a varying pattern in a buffer. */ +static void set_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + size_t i; + + for (i = 0; i < num_pages; i++) { + char *ptr2 = &ptr[i * page_size]; + + memset(ptr2, 'a' + (i % 26), page_size); + } +} + +/* + * Check that a buffer contains the pattern set by set_pattern(), starting at a + * page offset of pgoff within the buffer. + */ +static bool check_pattern_offset(char *ptr, size_t num_pages, size_t page_size, + size_t pgoff) +{ + size_t i; + + for (i = 0; i < num_pages * page_size; i++) { + size_t offset = pgoff * page_size + i; + char actual = ptr[offset]; + char expected = 'a' + ((offset / page_size) % 26); + + if (actual != expected) + return false; + } + + return true; +} + +/* Check that a buffer contains the pattern set by set_pattern(). */ +static bool check_pattern(char *ptr, size_t num_pages, size_t page_size) +{ + return check_pattern_offset(ptr, num_pages, page_size, 0); +} + +/* Determine if a buffer contains only repetitions of a specified char. */ +static bool is_buf_eq(char *buf, size_t size, char chr) +{ + size_t i; + + for (i = 0; i < size; i++) { + if (buf[i] != chr) + return false; + } + + return true; +} + +FIXTURE_SETUP(guard_regions) +{ + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); + setup_sighandler(); + + switch (variant->backing) { + case ANON_BACKED: + return; + case LOCAL_FILE_BACKED: + self->fd = open_file("", self->path); + break; + case SHMEM_BACKED: + self->fd = memfd_create(self->path, 0); + break; + } + + /* We truncate file to at least 100 pages, tests can modify as needed. */ + ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); +}; + +FIXTURE_TEARDOWN_PARENT(guard_regions) +{ + teardown_sighandler(); + + if (variant->backing == ANON_BACKED) + return; + + if (self->fd >= 0) + close(self->fd); + + if (self->path[0] != '\0') + unlink(self->path); +} + +TEST_F(guard_regions, basic) { const unsigned long NUM_PAGES = 10; const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = mmap_(self, variant, NULL, NUM_PAGES * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Trivially assert we can touch the first page. */ @@ -225,32 +395,30 @@ TEST_F(guard_pages, basic) } /* Assert that operations applied across multiple VMAs work as expected. */ -TEST_F(guard_pages, multi_vma) +TEST_F(guard_regions, multi_vma) { const unsigned long page_size = self->page_size; char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; int i; /* Reserve a 100 page region over which we can install VMAs. */ - ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); ASSERT_NE(ptr_region, MAP_FAILED); /* Place a VMA of 10 pages size at the start of the region. */ - ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr1 = mmap_(self, variant, ptr_region, 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr1, MAP_FAILED); /* Place a VMA of 5 pages size 50 pages into the region. */ - ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr2, MAP_FAILED); /* Place a VMA of 20 pages size at the end of the region. */ - ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr3 = mmap_(self, variant, &ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr3, MAP_FAILED); /* Unmap gaps. */ @@ -320,13 +488,11 @@ TEST_F(guard_pages, multi_vma) } /* Now map incompatible VMAs in the gaps. */ - ptr = mmap(&ptr_region[10 * page_size], 40 * page_size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, &ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); ASSERT_NE(ptr, MAP_FAILED); - ptr = mmap(&ptr_region[55 * page_size], 25 * page_size, - PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, &ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED, 0); ASSERT_NE(ptr, MAP_FAILED); /* @@ -361,20 +527,16 @@ TEST_F(guard_pages, multi_vma) * Assert that batched operations performed using process_madvise() work as * expected. */ -TEST_F(guard_pages, process_madvise) +TEST_F(guard_regions, process_madvise) { const unsigned long page_size = self->page_size; - pid_t pid = getpid(); - int pidfd = pidfd_open(pid, 0); char *ptr_region, *ptr1, *ptr2, *ptr3; ssize_t count; struct iovec vec[6]; - ASSERT_NE(pidfd, -1); - /* Reserve region to map over. */ - ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_region = mmap_(self, variant, NULL, 100 * page_size, + PROT_NONE, 0, 0); ASSERT_NE(ptr_region, MAP_FAILED); /* @@ -382,9 +544,8 @@ TEST_F(guard_pages, process_madvise) * overwrite existing entries and test this code path against * overwriting existing entries. */ - ptr1 = mmap(&ptr_region[page_size], 10 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0); + ptr1 = mmap_(self, variant, &ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED | MAP_POPULATE, 0); ASSERT_NE(ptr1, MAP_FAILED); /* We want guard markers at start/end of each VMA. */ vec[0].iov_base = ptr1; @@ -393,9 +554,8 @@ TEST_F(guard_pages, process_madvise) vec[1].iov_len = page_size; /* 5 pages offset 50 pages into reserve region. */ - ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr2 = mmap_(self, variant, &ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr2, MAP_FAILED); vec[2].iov_base = ptr2; vec[2].iov_len = page_size; @@ -403,9 +563,8 @@ TEST_F(guard_pages, process_madvise) vec[3].iov_len = page_size; /* 20 pages offset 79 pages into reserve region. */ - ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size, - PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr3 = mmap_(self, variant, &ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr3, MAP_FAILED); vec[4].iov_base = ptr3; vec[4].iov_len = page_size; @@ -419,7 +578,7 @@ TEST_F(guard_pages, process_madvise) ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0); /* Now guard in one step. */ - count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0); + count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_INSTALL, 0); /* OK we don't have permission to do this, skip. */ if (count == -1 && errno == EPERM) @@ -440,7 +599,7 @@ TEST_F(guard_pages, process_madvise) ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size])); /* Now do the same with unguard... */ - count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0); + count = sys_process_madvise(PIDFD_SELF, vec, 6, MADV_GUARD_REMOVE, 0); /* ...and everything should now succeed. */ @@ -457,17 +616,16 @@ TEST_F(guard_pages, process_madvise) ASSERT_EQ(munmap(ptr1, 10 * page_size), 0); ASSERT_EQ(munmap(ptr2, 5 * page_size), 0); ASSERT_EQ(munmap(ptr3, 20 * page_size), 0); - close(pidfd); } /* Assert that unmapping ranges does not leave guard markers behind. */ -TEST_F(guard_pages, munmap) +TEST_F(guard_regions, munmap) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new1, *ptr_new2; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard first and last pages. */ @@ -483,11 +641,11 @@ TEST_F(guard_pages, munmap) ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); /* Map over them.*/ - ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new1 = mmap_(self, variant, ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED, 0); ASSERT_NE(ptr_new1, MAP_FAILED); - ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new2 = mmap_(self, variant, &ptr[9 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new2, MAP_FAILED); /* Assert that they are now not guarded. */ @@ -499,14 +657,14 @@ TEST_F(guard_pages, munmap) } /* Assert that mprotect() operations have no bearing on guard markers. */ -TEST_F(guard_pages, mprotect) +TEST_F(guard_regions, mprotect) { const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard the middle of the range. */ @@ -547,14 +705,14 @@ TEST_F(guard_pages, mprotect) } /* Split and merge VMAs and make sure guard pages still behave. */ -TEST_F(guard_pages, split_merge) +TEST_F(guard_regions, split_merge) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard the whole range. */ @@ -595,14 +753,14 @@ TEST_F(guard_pages, split_merge) } /* Now map them again - the unmap will have cleared the guards. */ - ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[2 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); - ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[5 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); - ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE, - MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, &ptr[8 * page_size], page_size, + PROT_READ | PROT_WRITE, MAP_FIXED, 0); ASSERT_NE(ptr_new, MAP_FAILED); /* Now make sure guard pages are established. */ @@ -678,14 +836,14 @@ TEST_F(guard_pages, split_merge) } /* Assert that MADV_DONTNEED does not remove guard markers. */ -TEST_F(guard_pages, dontneed) +TEST_F(guard_regions, dontneed) { const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Back the whole range. */ @@ -715,8 +873,16 @@ TEST_F(guard_pages, dontneed) ASSERT_FALSE(result); } else { ASSERT_TRUE(result); - /* Make sure we really did get reset to zero page. */ - ASSERT_EQ(*curr, '\0'); + switch (variant->backing) { + case ANON_BACKED: + /* If anon, then we get a zero page. */ + ASSERT_EQ(*curr, '\0'); + break; + default: + /* Otherwise, we get the file data. */ + ASSERT_EQ(*curr, 'y'); + break; + } } /* Now write... */ @@ -731,14 +897,14 @@ TEST_F(guard_pages, dontneed) } /* Assert that mlock()'ed pages work correctly with guard markers. */ -TEST_F(guard_pages, mlock) +TEST_F(guard_regions, mlock) { const unsigned long page_size = self->page_size; char *ptr; int i; - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Populate. */ @@ -804,14 +970,14 @@ TEST_F(guard_pages, mlock) * * - Moving a mapping alone should retain markers as they are. */ -TEST_F(guard_pages, mremap_move) +TEST_F(guard_regions, mremap_move) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; /* Map 5 pages. */ - ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Place guard markers at both ends of the 5 page span. */ @@ -825,8 +991,7 @@ TEST_F(guard_pages, mremap_move) /* Map a new region we will move this range into. Doing this ensures * that we have reserved a range to map into. */ - ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, - -1, 0); + ptr_new = mmap_(self, variant, NULL, 5 * page_size, PROT_NONE, 0, 0); ASSERT_NE(ptr_new, MAP_FAILED); ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, @@ -851,14 +1016,14 @@ TEST_F(guard_pages, mremap_move) * will have to remove guard pages manually to fix up (they'd have to do the * same if it were a PROT_NONE mapping). */ -TEST_F(guard_pages, mremap_expand) +TEST_F(guard_regions, mremap_expand) { const unsigned long page_size = self->page_size; char *ptr, *ptr_new; /* Map 10 pages... */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* ...But unmap the last 5 so we can ensure we can expand into them. */ ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); @@ -882,8 +1047,7 @@ TEST_F(guard_pages, mremap_expand) ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); /* Reserve a region which we can move to and expand into. */ - ptr_new = mmap(NULL, 20 * page_size, PROT_NONE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr_new = mmap_(self, variant, NULL, 20 * page_size, PROT_NONE, 0, 0); ASSERT_NE(ptr_new, MAP_FAILED); /* Now move and expand into it. */ @@ -914,15 +1078,15 @@ TEST_F(guard_pages, mremap_expand) * if the user were using a PROT_NONE mapping they'd have to manually fix this * up also so this is OK. */ -TEST_F(guard_pages, mremap_shrink) +TEST_F(guard_regions, mremap_shrink) { const unsigned long page_size = self->page_size; char *ptr; int i; /* Map 5 pages. */ - ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 5 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Place guard markers at both ends of the 5 page span. */ @@ -978,7 +1142,7 @@ TEST_F(guard_pages, mremap_shrink) * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set * retain guard pages. */ -TEST_F(guard_pages, fork) +TEST_F(guard_regions, fork) { const unsigned long page_size = self->page_size; char *ptr; @@ -986,11 +1150,11 @@ TEST_F(guard_pages, fork) int i; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); - /* Establish guard apges in the first 5 pages. */ + /* Establish guard pages in the first 5 pages. */ ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); pid = fork(); @@ -1030,19 +1194,96 @@ TEST_F(guard_pages, fork) } /* + * Assert expected behaviour after we fork populated ranges of anonymous memory + * and then guard and unguard the range. + */ +TEST_F(guard_regions, fork_cow) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + if (variant->backing != ANON_BACKED) + SKIP(return, "CoW only supported on anon mappings"); + + /* Map 10 pages. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate range. */ + for (i = 0; i < 10 * page_size; i++) { + char chr = 'a' + (i % 26); + + ptr[i] = chr; + } + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Ensure the range is as expected. */ + for (i = 0; i < 10 * page_size; i++) { + char expected = 'a' + (i % 26); + char actual = ptr[i]; + + ASSERT_EQ(actual, expected); + } + + /* Establish guard pages across the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + /* Remove it. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * By removing the guard pages, the page tables will be + * cleared. Assert that we are looking at the zero page now. + */ + for (i = 0; i < 10 * page_size; i++) { + char actual = ptr[i]; + + ASSERT_EQ(actual, '\0'); + } + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Ensure the range is unchanged in parent anon range. */ + for (i = 0; i < 10 * page_size; i++) { + char expected = 'a' + (i % 26); + char actual = ptr[i]; + + ASSERT_EQ(actual, expected); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* * Assert that forking a process with VMAs that do have VM_WIPEONFORK set * behave as expected. */ -TEST_F(guard_pages, fork_wipeonfork) +TEST_F(guard_regions, fork_wipeonfork) { const unsigned long page_size = self->page_size; char *ptr; pid_t pid; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "Wipe on fork only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Mark wipe on fork. */ @@ -1083,15 +1324,18 @@ TEST_F(guard_pages, fork_wipeonfork) } /* Ensure that MADV_FREE retains guard entries as expected. */ -TEST_F(guard_pages, lazyfree) +TEST_F(guard_regions, lazyfree) { const unsigned long page_size = self->page_size; char *ptr; int i; + if (variant->backing != ANON_BACKED) + SKIP(return, "MADV_FREE only supported on anon mappings"); + /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1119,14 +1363,14 @@ TEST_F(guard_pages, lazyfree) } /* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ -TEST_F(guard_pages, populate) +TEST_F(guard_regions, populate) { const unsigned long page_size = self->page_size; char *ptr; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1145,15 +1389,15 @@ TEST_F(guard_pages, populate) } /* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ -TEST_F(guard_pages, cold_pageout) +TEST_F(guard_regions, cold_pageout) { const unsigned long page_size = self->page_size; char *ptr; int i; /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Guard range. */ @@ -1191,7 +1435,7 @@ TEST_F(guard_pages, cold_pageout) } /* Ensure that guard pages do not break userfaultd. */ -TEST_F(guard_pages, uffd) +TEST_F(guard_regions, uffd) { const unsigned long page_size = self->page_size; int uffd; @@ -1204,17 +1448,33 @@ TEST_F(guard_pages, uffd) struct uffdio_register reg; struct uffdio_range range; + if (!is_anon_backed(variant)) + SKIP(return, "uffd only works on anon backing"); + /* Set up uffd. */ uffd = userfaultfd(0); - if (uffd == -1 && errno == EPERM) - ksft_exit_skip("No userfaultfd permissions, try running as root.\n"); + if (uffd == -1) { + switch (errno) { + case EPERM: + SKIP(return, "No userfaultfd permissions, try running as root."); + break; + case ENOSYS: + SKIP(return, "userfaultfd is not supported/not enabled."); + break; + default: + ksft_exit_fail_msg("userfaultfd failed with %s\n", + strerror(errno)); + break; + } + } + ASSERT_NE(uffd, -1); ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); /* Map 10 pages. */ - ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, - MAP_ANON | MAP_PRIVATE, -1, 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); ASSERT_NE(ptr, MAP_FAILED); /* Register the range with uffd. */ @@ -1240,4 +1500,649 @@ TEST_F(guard_pages, uffd) ASSERT_EQ(munmap(ptr, 10 * page_size), 0); } +/* + * Mark a region within a file-backed mapping using MADV_SEQUENTIAL so we + * aggressively read-ahead, then install guard regions and assert that it + * behaves correctly. + * + * We page out using MADV_PAGEOUT before checking guard regions so we drop page + * cache folios, meaning we maximise the possibility of some broken readahead. + */ +TEST_F(guard_regions, madvise_sequential) +{ + char *ptr; + int i; + const unsigned long page_size = self->page_size; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MADV_SEQUENTIAL meaningful only for file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern of data in the file. */ + set_pattern(ptr, 10, page_size); + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Mark it as being accessed sequentially. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_SEQUENTIAL), 0); + + /* Mark every other page a guard page. */ + for (i = 0; i < 10; i += 2) { + char *ptr2 = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr2, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now page it out. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Now make sure pages are as expected. */ + for (i = 0; i < 10; i++) { + char *chrp = &ptr[i * page_size]; + + if (i % 2 == 0) { + bool result = try_read_write_buf(chrp); + + ASSERT_FALSE(result); + } else { + ASSERT_EQ(*chrp, 'a' + i); + } + } + + /* Now remove guard pages. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure all data is as expected. */ + if (!check_pattern(ptr, 10, page_size)) + ASSERT_TRUE(false); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Check that file-backed mappings implement guard regions with MAP_PRIVATE + * correctly. + */ +TEST_F(guard_regions, map_private) +{ + const unsigned long page_size = self->page_size; + char *ptr_shared, *ptr_private; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "MAP_PRIVATE test specific to file-backed"); + + ptr_shared = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr_shared, MAP_FAILED); + + /* Manually mmap(), do not use mmap_() wrapper so we can force MAP_PRIVATE. */ + ptr_private = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, self->fd, 0); + ASSERT_NE(ptr_private, MAP_FAILED); + + /* Set pattern in shared mapping. */ + set_pattern(ptr_shared, 10, page_size); + + /* Install guard regions in every other page in the shared mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_shared[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Install guard regions in every other page in the private mapping. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + } + + for (i = 0; i < 10; i++) { + /* Every even shared page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_shared[i * page_size]), i % 2 != 0); + /* Every odd private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from shared mapping. */ + ASSERT_EQ(madvise(ptr_shared, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Every even private page should be guarded. */ + ASSERT_EQ(try_read_buf(&ptr_private[i * page_size]), i % 2 != 0); + } + + /* Remove guard regions from private mapping. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < 10; i++) { + /* Shared mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + /* Private mappings should always be readable. */ + ASSERT_TRUE(try_read_buf(&ptr_private[i * page_size])); + } + + /* Ensure patterns are intact. */ + ASSERT_TRUE(check_pattern(ptr_shared, 10, page_size)); + ASSERT_TRUE(check_pattern(ptr_private, 10, page_size)); + + /* Now write out every other page to MAP_PRIVATE. */ + for (i = 0; i < 10; i += 2) { + char *ptr = &ptr_private[i * page_size]; + + memset(ptr, 'a' + i, page_size); + } + + /* + * At this point the mapping is: + * + * 0123456789 + * SPSPSPSPSP + * + * Where S = shared, P = private mappings. + */ + + /* Now mark the beginning of the mapping guarded. */ + ASSERT_EQ(madvise(ptr_private, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* + * This renders the mapping: + * + * 0123456789 + * xxxxxPSPSP + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* Ensure guard regions as expected. */ + ASSERT_EQ(try_read_buf(ptr), i >= 5); + /* The shared mapping should always succeed. */ + ASSERT_TRUE(try_read_buf(&ptr_shared[i * page_size])); + } + + /* Remove the guard regions altogether. */ + ASSERT_EQ(madvise(ptr_private, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* + * + * We now expect the mapping to be: + * + * 0123456789 + * SSSSSPSPSP + * + * As we removed guard regions, the private pages from the first 5 will + * have been zapped, so on fault will reestablish the shared mapping. + */ + + for (i = 0; i < 10; i++) { + char *ptr = &ptr_private[i * page_size]; + + /* + * Assert that shared mappings in the MAP_PRIVATE mapping match + * the shared mapping. + */ + if (i < 5 || i % 2 == 0) { + char *ptr_s = &ptr_shared[i * page_size]; + + ASSERT_EQ(memcmp(ptr, ptr_s, page_size), 0); + continue; + } + + /* Everything else is a private mapping. */ + ASSERT_TRUE(is_buf_eq(ptr, page_size, 'a' + i)); + } + + ASSERT_EQ(munmap(ptr_shared, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr_private, 10 * page_size), 0); +} + +/* Test that guard regions established over a read-only mapping function correctly. */ +TEST_F(guard_regions, readonly_file) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != LOCAL_FILE_BACKED) + SKIP(return, "Read-only test specific to file-backed"); + + /* Map shared so we can populate with pattern, populate it, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + /* Close the fd so we can re-open read-only. */ + ASSERT_EQ(close(self->fd), 0); + + /* Re-open read-only. */ + self->fd = open(self->path, O_RDONLY); + ASSERT_NE(self->fd, -1); + /* Re-map read-only. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark every other page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_pg, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Assert that the guard regions are in place.*/ + for (i = 0; i < 10; i++) { + char *ptr_pg = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_pg), i % 2 != 0); + } + + /* Remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure the data is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, fault_around) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Fault-around test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish a pattern in the backing file. */ + set_pattern(ptr, 10, page_size); + + /* + * Now drop it from the page cache so we get major faults when next we + * map it. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Unmap and remap 'to be sure'. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now fault in every odd page. This should trigger fault-around. */ + for (i = 1; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Finally, ensure that guard regions are intact as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, truncation) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Establish a pattern in the backing file, just so there is data + * there. + */ + set_pattern(ptr, 10, page_size); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to actually used size (initialised to 100). */ + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Here the guard regions will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + /* Now truncate to half the size, then truncate again to the full size. */ + ASSERT_EQ(ftruncate(self->fd, 5 * page_size), 0); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Again, guard pages will remain intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_write_buf(ptr_p), i % 2 != 0); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_F(guard_regions, hole_punch) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing == ANON_BACKED) + SKIP(return, "Truncation test specific to file-backed"); + + /* Establish pattern in mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + + /* Install a guard region in the middle of the mapping. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_GUARD_INSTALL), 0); + + /* + * The buffer will now be: + * + * 0123456789 + * ***xxxx*** + * + * Where * is data and x is the guard region. + */ + + /* Ensure established. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now hole punch the guarded region. */ + ASSERT_EQ(madvise(&ptr[3 * page_size], 4 * page_size, + MADV_REMOVE), 0); + + /* Ensure guard regions remain. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i < 3 || i >= 7); + } + + /* Now remove guard region throughout. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Check that the pattern exists in non-hole punched region. */ + ASSERT_TRUE(check_pattern(ptr, 3, page_size)); + /* Check that hole punched region is zeroed. */ + ASSERT_TRUE(is_buf_eq(&ptr[3 * page_size], 4 * page_size, '\0')); + /* Check that the pattern exists in the remainder of the file. */ + ASSERT_TRUE(check_pattern_offset(ptr, 3, page_size, 7)); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Ensure that a memfd works correctly with guard regions, that we can write + * seal it then open the mapping read-only and still establish guard regions + * within, remove those guard regions and have everything work correctly. + */ +TEST_F(guard_regions, memfd_write_seal) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (variant->backing != SHMEM_BACKED) + SKIP(return, "memfd write seal test specific to shmem"); + + /* OK, we need a memfd, so close existing one. */ + ASSERT_EQ(close(self->fd), 0); + + /* Create and truncate memfd. */ + self->fd = memfd_create("guard_regions_memfd_seals_test", + MFD_ALLOW_SEALING); + ASSERT_NE(self->fd, -1); + ASSERT_EQ(ftruncate(self->fd, 10 * page_size), 0); + + /* Map, set pattern, unmap. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + set_pattern(ptr, 10, page_size); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); + + /* Write-seal the memfd. */ + ASSERT_EQ(fcntl(self->fd, F_ADD_SEALS, F_SEAL_WRITE), 0); + + /* Now map the memfd readonly. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Ensure pattern is as expected. */ + ASSERT_TRUE(check_pattern(ptr, 10, page_size)); + + /* Ensure write seal intact. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_FALSE(try_write_buf(ptr_p)); + } + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + + +/* + * Since we are now permitted to establish guard regions in read-only anonymous + * mappings, for the sake of thoroughness, though it probably has no practical + * use, test that guard regions function with a mapping to the anonymous zero + * page. + */ +TEST_F(guard_regions, anon_zeropage) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + if (!is_anon_backed(variant)) + SKIP(return, "anon zero page test specific to anon/shmem"); + + /* Obtain a read-only i.e. anon zero page mapping. */ + ptr = mmap_(self, variant, NULL, 10 * page_size, PROT_READ, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Now make every even page guarded. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(try_read_buf(ptr_p), i % 2 != 0); + } + + /* Now remove all guard regions. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now assert things are as expected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(ptr_p)); + } + + /* Ensure zero page...*/ + ASSERT_TRUE(is_buf_eq(ptr, 10 * page_size, '\0')); + + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that /proc/$pid/pagemap correctly identifies guard region ranges. + */ +TEST_F(guard_regions, pagemap) +{ + const unsigned long page_size = self->page_size; + int proc_fd; + char *ptr; + int i; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Read from pagemap, and assert no guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, 0); + } + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(madvise(ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 10; i++) { + char *ptr_p = &ptr[i * page_size]; + unsigned long entry = pagemap_get_entry(proc_fd, ptr_p); + unsigned long masked = entry & PM_GUARD_REGION; + + ASSERT_EQ(masked, i % 2 == 0 ? PM_GUARD_REGION : 0); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that PAGEMAP_SCAN correctly reports guard region ranges. + */ +TEST_F(guard_regions, pagemap_scan) +{ + const unsigned long page_size = self->page_size; + struct page_region pm_regs[10]; + struct pm_scan_arg pm_scan_args = { + .size = sizeof(struct pm_scan_arg), + .category_anyof_mask = PAGE_IS_GUARD, + .return_mask = PAGE_IS_GUARD, + .vec = (long)&pm_regs, + .vec_len = ARRAY_SIZE(pm_regs), + }; + int proc_fd, i; + char *ptr; + + proc_fd = open("/proc/self/pagemap", O_RDONLY); + ASSERT_NE(proc_fd, -1); + + ptr = mmap_(self, variant, NULL, 10 * page_size, + PROT_READ | PROT_WRITE, 0, 0); + ASSERT_NE(ptr, MAP_FAILED); + + pm_scan_args.start = (long)ptr; + pm_scan_args.end = (long)ptr + 10 * page_size; + ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 0); + ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + + /* Install a guard region in every other page. */ + for (i = 0; i < 10; i += 2) { + char *ptr_p = &ptr[i * page_size]; + + ASSERT_EQ(syscall(__NR_madvise, ptr_p, page_size, MADV_GUARD_INSTALL), 0); + } + + /* + * Assert ioctl() returns the count of located regions, where each + * region spans every other page within the range of 10 pages. + */ + ASSERT_EQ(ioctl(proc_fd, PAGEMAP_SCAN, &pm_scan_args), 5); + ASSERT_EQ(pm_scan_args.walk_end, (long)ptr + 10 * page_size); + + /* Re-read from pagemap, and assert guard regions are detected. */ + for (i = 0; i < 5; i++) { + long ptr_p = (long)&ptr[2 * i * page_size]; + + ASSERT_EQ(pm_regs[i].start, ptr_p); + ASSERT_EQ(pm_regs[i].end, ptr_p + page_size); + ASSERT_EQ(pm_regs[i].categories, PAGE_IS_GUARD); + } + + ASSERT_EQ(close(proc_fd), 0); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 9423ad439a61..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -93,29 +93,48 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) __fsword_t fs_type = get_fs_type(fd); bool should_work; char *mem; + int result = KSFT_PASS; int ret; + if (fd < 0) { + result = KSFT_FAIL; + goto report; + } + if (ftruncate(fd, size)) { - ksft_test_result_fail("ftruncate() failed\n"); + if (errno == ENOENT) { + skip_test_dodgy_fs("ftruncate()"); + } else { + ksft_print_msg("ftruncate() failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + goto report; + } return; } if (fallocate(fd, 0, 0, size)) { - if (size == pagesize) - ksft_test_result_fail("fallocate() failed\n"); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize) { + ksft_print_msg("fallocate() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } mem = mmap(NULL, size, PROT_READ | PROT_WRITE, shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); if (mem == MAP_FAILED) { - if (size == pagesize || shared) - ksft_test_result_fail("mmap() failed\n"); - else - ksft_test_result_skip("need more free huge pages\n"); - return; + if (size == pagesize || shared) { + ksft_print_msg("mmap() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; + } else { + ksft_print_msg("need more free huge pages\n"); + result = KSFT_SKIP; + } + goto report; } /* Fault in the page such that GUP-fast can pin it directly. */ @@ -130,7 +149,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) */ ret = mprotect(mem, size, PROT_READ); if (ret) { - ksft_test_result_fail("mprotect() failed\n"); + ksft_print_msg("mprotect() failed (%s)\n", strerror(errno)); + result = KSFT_FAIL; goto munmap; } /* FALLTHROUGH */ @@ -143,18 +163,20 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) type == TEST_TYPE_RW_FAST; if (gup_fd < 0) { - ksft_test_result_skip("gup_test not available\n"); + ksft_print_msg("gup_test not available\n"); + result = KSFT_SKIP; break; } if (rw && shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; return; } /* * R/O pinning or pinning in a private mapping is always * expected to work. Otherwise, we expect long-term R/W pinning - * to only succeed for special fielesystems. + * to only succeed for special filesystems. */ should_work = !shared || !rw || fs_supports_writable_longterm_pinning(fs_type); @@ -165,25 +187,35 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); if (ret && errno == EINVAL) { - ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (EINVAL)n"); + result = KSFT_SKIP; break; } else if (ret && errno == EFAULT) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) + result = KSFT_FAIL; + else + result = KSFT_PASS; break; } else if (ret) { - ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + ksft_print_msg("PIN_LONGTERM_TEST_START failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; break; } if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) - ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed (%s)\n", + strerror(errno)); /* * TODO: if the kernel ever supports long-term R/W pinning on * some previously unsupported filesystems, we might want to * perform some additional tests for possible data corruptions. */ - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) + result = KSFT_PASS; + else + result = KSFT_FAIL; break; } #ifdef LOCAL_CONFIG_HAVE_LIBURING @@ -193,8 +225,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* io_uring always pins pages writable. */ if (shared && fs_is_unknown(fs_type)) { - ksft_test_result_skip("Unknown filesystem\n"); - return; + ksft_print_msg("Unknown filesystem\n"); + result = KSFT_SKIP; + goto report; } should_work = !shared || fs_supports_writable_longterm_pinning(fs_type); @@ -202,7 +235,9 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Skip on errors, as we might just lack kernel support. */ ret = io_uring_queue_init(1, &ring, 0); if (ret < 0) { - ksft_test_result_skip("io_uring_queue_init() failed\n"); + ksft_print_msg("io_uring_queue_init() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; break; } /* @@ -215,15 +250,28 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) /* Only new kernels return EFAULT. */ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || errno == EFAULT)) { - ksft_test_result(!should_work, "Should have failed\n"); + if (should_work) { + ksft_print_msg("Should have failed (%s)\n", + strerror(errno)); + result = KSFT_FAIL; + } else { + result = KSFT_PASS; + } } else if (ret) { /* * We might just lack support or have insufficient * MEMLOCK limits. */ - ksft_test_result_skip("io_uring_register_buffers() failed\n"); + ksft_print_msg("io_uring_register_buffers() failed (%s)\n", + strerror(-ret)); + result = KSFT_SKIP; } else { - ksft_test_result(should_work, "Should have worked\n"); + if (should_work) { + result = KSFT_PASS; + } else { + ksft_print_msg("Should have worked\n"); + result = KSFT_FAIL; + } io_uring_unregister_buffers(&ring); } @@ -237,6 +285,8 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared) munmap: munmap(mem, size); +report: + log_test_result(result); } typedef void (*test_fn)(int fd, size_t size); @@ -245,11 +295,12 @@ static void run_with_memfd(test_fn fn, const char *desc) { int fd; - ksft_print_msg("[RUN] %s ... with memfd\n", desc); + log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); if (fd < 0) { - ksft_test_result_fail("memfd_create() failed\n"); + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); return; } @@ -262,23 +313,23 @@ static void run_with_tmpfile(test_fn fn, const char *desc) FILE *file; int fd; - ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + log_test_start("%s ... with tmpfile", desc); file = tmpfile(); if (!file) { - ksft_test_result_fail("tmpfile() failed\n"); - return; - } - - fd = fileno(file); - if (fd < 0) { - ksft_test_result_fail("fileno() failed\n"); - goto close; + ksft_print_msg("tmpfile() failed (%s)\n", strerror(errno)); + fd = -1; + } else { + fd = fileno(file); + if (fd < 0) { + ksft_print_msg("fileno() failed (%s)\n", strerror(errno)); + } } fn(fd, pagesize); -close: - fclose(file); + + if (file) + fclose(file); } static void run_with_local_tmpfile(test_fn fn, const char *desc) @@ -286,22 +337,22 @@ static void run_with_local_tmpfile(test_fn fn, const char *desc) char filename[] = __FILE__"_tmpfile_XXXXXX"; int fd; - ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + log_test_start("%s ... with local tmpfile", desc); fd = mkstemp(filename); - if (fd < 0) { - ksft_test_result_fail("mkstemp() failed\n"); - return; - } + if (fd < 0) + ksft_print_msg("mkstemp() failed (%s)\n", strerror(errno)); if (unlink(filename)) { - ksft_test_result_fail("unlink() failed\n"); - goto close; + ksft_print_msg("unlink() failed (%s)\n", strerror(errno)); + close(fd); + fd = -1; } fn(fd, pagesize); -close: - close(fd); + + if (fd >= 0) + close(fd); } static void run_with_memfd_hugetlb(test_fn fn, const char *desc, @@ -310,14 +361,15 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, int flags = MFD_HUGETLB; int fd; - ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + log_test_start("%s ... with memfd hugetlb (%zu kB)", desc, hugetlbsize / 1024); flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; fd = memfd_create("test", flags); if (fd < 0) { - ksft_test_result_skip("memfd_create() failed\n"); + ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); return; } @@ -446,7 +498,7 @@ static int tests_per_test_case(void) int main(int argc, char **argv) { - int i, err; + int i; pagesize = getpagesize(); nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, @@ -460,9 +512,5 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_test_case(&test_cases[i]); - err = ksft_get_fail_cnt(); - if (err) - ksft_exit_fail_msg("%d out of %d tests failed\n", - err, ksft_test_num()); - ksft_exit_pass(); + ksft_finished(); } diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc22..0dd31892ff67 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi @@ -36,7 +36,7 @@ else do_umount=1 fi fi -MNT='/mnt/huge/' +MNT='/mnt/huge' function get_machine_hugepage_size() { hpz=$(grep -i hugepagesize /proc/meminfo) @@ -56,10 +56,45 @@ function cleanup() { rmdir "$CGROUP_ROOT"/a/b 2>/dev/null rmdir "$CGROUP_ROOT"/a 2>/dev/null rmdir "$CGROUP_ROOT"/test1 2>/dev/null - echo 0 >/proc/sys/vm/nr_hugepages + echo $nr_hugepgs >/proc/sys/vm/nr_hugepages set -e } +function assert_with_retry() { + local actual_path="$1" + local expected="$2" + local tolerance=$((7 * 1024 * 1024)) + local timeout=20 + local interval=1 + local start_time + local now + local elapsed + local actual + + start_time=$(date +%s) + + while true; do + actual="$(cat "$actual_path")" + + if [[ $actual -ge $(($expected - $tolerance)) ]] && + [[ $actual -le $(($expected + $tolerance)) ]]; then + return 0 + fi + + now=$(date +%s) + elapsed=$((now - start_time)) + + if [[ $elapsed -ge $timeout ]]; then + echo "actual = $((${actual%% *} / 1024 / 1024)) MB" + echo "expected = $((${expected%% *} / 1024 / 1024)) MB" + cleanup + exit 1 + fi + + sleep $interval + done +} + function assert_state() { local expected_a="$1" local expected_a_hugetlb="$2" @@ -70,58 +105,13 @@ function assert_state() { expected_b="$3" expected_b_hugetlb="$4" fi - local tolerance=$((5 * 1024 * 1024)) - - local actual_a - actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)" - if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] || - [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then - echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB - echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - local actual_a_hugetlb - actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)" - if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] || - [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then - echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB - echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB - echo fail + assert_with_retry "$CGROUP_ROOT/a/memory.$usage_file" "$expected_a" + assert_with_retry "$CGROUP_ROOT/a/hugetlb.${MB}MB.$usage_file" "$expected_a_hugetlb" - cleanup - exit 1 - fi - - if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then - return - fi - - local actual_b - actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)" - if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] || - [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then - echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB - echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 - fi - - local actual_b_hugetlb - actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)" - if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] || - [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then - echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB - echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB - echo fail - - cleanup - exit 1 + if [[ -n "$expected_b" && -n "$expected_b_hugetlb" ]]; then + assert_with_retry "$CGROUP_ROOT/a/b/memory.$usage_file" "$expected_b" + assert_with_retry "$CGROUP_ROOT/a/b/hugetlb.${MB}MB.$usage_file" "$expected_b_hugetlb" fi } @@ -175,7 +165,6 @@ size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages. cleanup echo -echo echo Test charge, rmdir, uncharge setup echo mkdir @@ -195,7 +184,6 @@ cleanup echo done echo -echo if [[ ! $cgroup2 ]]; then echo "Test parent and child hugetlb usage" setup @@ -212,7 +200,6 @@ if [[ ! $cgroup2 ]]; then assert_state 0 $(($size * 2)) 0 $size rmdir "$CGROUP_ROOT"/a/b - sleep 5 echo Assert memory reparent correctly. assert_state 0 $(($size * 2)) @@ -225,7 +212,6 @@ if [[ ! $cgroup2 ]]; then fi echo -echo echo "Test child only hugetlb usage" echo setup setup diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index b748c48908d9..e80deac1436b 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -58,40 +58,12 @@ int debug; static int ksm_write_sysfs(const char *file_path, unsigned long val) { - FILE *f = fopen(file_path, "w"); - - if (!f) { - fprintf(stderr, "f %s\n", file_path); - perror("fopen"); - return 1; - } - if (fprintf(f, "%lu", val) < 0) { - perror("fprintf"); - fclose(f); - return 1; - } - fclose(f); - - return 0; + return write_sysfs(file_path, val); } static int ksm_read_sysfs(const char *file_path, unsigned long *val) { - FILE *f = fopen(file_path, "r"); - - if (!f) { - fprintf(stderr, "f %s\n", file_path); - perror("fopen"); - return 1; - } - if (fscanf(f, "%lu", val) != 1) { - perror("fscanf"); - fclose(f); - return 1; - } - fclose(f); - - return 0; + return read_sysfs(file_path, val); } static void ksm_print_sysfs(void) @@ -776,7 +748,7 @@ err_out: int main(int argc, char *argv[]) { - int ret, opt; + int ret = 0, opt; int prot = 0; int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT; int merge_type = KSM_MERGE_TYPE_DEFAULT; diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index ef7d911da13e..b6fabd5c27ed 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -172,12 +172,12 @@ static void test_populate_read(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "read range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_READ); ksft_test_result(!ret, "MADV_POPULATE_READ\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "read range is populated\n"); munmap(addr, SIZE); } @@ -194,12 +194,12 @@ static void test_populate_write(void) if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap failed\n"); ksft_test_result(range_is_not_populated(addr, SIZE), - "range initially not populated\n"); + "write range initially not populated\n"); ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_populated(addr, SIZE), - "range is populated\n"); + "write range is populated\n"); munmap(addr, SIZE); } @@ -247,19 +247,19 @@ static void test_softdirty(void) /* Clear any softdirty bits. */ clear_softdirty(); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "cleared range is not softdirty\n"); /* Populating READ should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_READ); - ksft_test_result(!ret, "MADV_POPULATE_READ\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_READ\n"); ksft_test_result(range_is_not_softdirty(addr, SIZE), - "range is not softdirty\n"); + "range is not softdirty after MADV_POPULATE_READ\n"); /* Populating WRITE should set softdirty. */ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE); - ksft_test_result(!ret, "MADV_POPULATE_WRITE\n"); + ksft_test_result(!ret, "softdirty MADV_POPULATE_WRITE\n"); ksft_test_result(range_is_softdirty(addr, SIZE), - "range is softdirty\n"); + "range is softdirty after MADV_POPULATE_WRITE \n"); munmap(addr, SIZE); } diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index d53de2486080..1e9980b8993c 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -96,7 +96,7 @@ int main(void) ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n"); } ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); - ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n"); + ksft_test_result_pass("Second mmap() 5*PAGE_SIZE at base\n"); /* * Second mapping contained within first: diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 5c8a53869b1b..9df2636c829b 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -18,6 +18,8 @@ #include <unistd.h> #include "../kselftest.h" +#include "vm_util.h" + #define MMAP_SZ 4096 #define BUG_ON(condition, description) \ @@ -87,6 +89,9 @@ int main(int argc, char **argv) BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); + if (ret < 0 && errno == ENOENT) { + skip_test_dodgy_fs("ftruncate()"); + } BUG_ON(ret, "ftruncate()"); smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE, diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c new file mode 100644 index 000000000000..cc26480098ae --- /dev/null +++ b/tools/testing/selftests/mm/merge.c @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/wait.h> +#include <linux/perf_event.h> +#include "vm_util.h" + +FIXTURE(merge) +{ + unsigned int page_size; + char *carveout; + struct procmap_fd procmap; +}; + +FIXTURE_SETUP(merge) +{ + self->page_size = psize(); + /* Carve out PROT_NONE region to map over. */ + self->carveout = mmap(NULL, 12 * self->page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(self->carveout, MAP_FAILED); + /* Setup PROCMAP_QUERY interface. */ + ASSERT_EQ(open_self_procmap(&self->procmap), 0); +} + +FIXTURE_TEARDOWN(merge) +{ + ASSERT_EQ(munmap(self->carveout, 12 * self->page_size), 0); + ASSERT_EQ(close_procmap(&self->procmap), 0); +} + +TEST_F(merge, mprotect_unfaulted_left) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * Map 10 pages of R/W memory within. MAP_NORESERVE so we don't hit + * merge failure due to lack of VM_ACCOUNT flag by mistake. + * + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first 5 pages read-only, splitting the VMA: + * + * RO RW + * |-----------|-----------| + * | unfaulted | unfaulted | + * |-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + /* + * Fault in the first of the last 5 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW + * |-----------|-----------| + * | unfaulted | faulted | + * |-----------|-----------| + */ + ptr[5 * page_size] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge (though for + * ~15 years we did not! :): + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[page_size], 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the last 5 pages read-only, splitting the VMA: + * + * RW RO + * |-----------|-----------| + * | unfaulted | unfaulted | + * |-----------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 5 * page_size, PROT_READ), 0); + /* + * Fault in the first of the first 5 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RW RO + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + */ + ptr[0] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 10 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_both) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first and last 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | unfaulted | unfaulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the middle 3 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | faulted | unfaulted | + * |-----------|-----------|-----------| + */ + ptr[3 * page_size] = 'x'; + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_faulted_left_unfaulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the last 3 pages read-only, splitting the VMA: + * + * RW RO + * |-----------------------|-----------| + * | unfaulted | unfaulted | + * |-----------------------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the first 6 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RW RO + * |-----------------------|-----------| + * | unfaulted | unfaulted | + * |-----------------------|-----------| + */ + ptr[0] = 'x'; + /* + * Now make the first 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | faulted | faulted | unfaulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, mprotect_unfaulted_left_faulted_right) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + char *ptr; + + /* + * |-----------------------| + * | unfaulted | + * |-----------------------| + */ + ptr = mmap(&carveout[2 * page_size], 9 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* + * Now make the first 3 pages read-only, splitting the VMA: + * + * RO RW + * |-----------|-----------------------| + * | unfaulted | unfaulted | + * |-----------|-----------------------| + */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + /* + * Fault in the first of the last 6 pages so it gets an anon_vma and + * thus the whole VMA becomes 'faulted': + * + * RO RW + * |-----------|-----------------------| + * | unfaulted | faulted | + * |-----------|-----------------------| + */ + ptr[3 * page_size] = 'x'; + /* + * Now make the last 3 pages read-only, splitting the VMA: + * + * RO RW RO + * |-----------|-----------|-----------| + * | unfaulted | faulted | faulted | + * |-----------|-----------|-----------| + */ + ASSERT_EQ(mprotect(&ptr[6 * page_size], 3 * page_size, PROT_READ), 0); + /* + * Now mprotect() the RW region read-only, we should merge: + * + * RO + * |-----------------------| + * | faulted | + * |-----------------------| + */ + ASSERT_EQ(mprotect(&ptr[3 * page_size], 3 * page_size, PROT_READ), 0); + + /* Assert that the merge succeeded using PROCMAP_QUERY. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 9 * page_size); +} + +TEST_F(merge, forked_target_vma) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + pid_t pid; + char *ptr, *ptr2; + int i; + + /* + * |-----------| + * | unfaulted | + * |-----------| + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in process. + * + * |-----------| + * | faulted | + * |-----------| + */ + ptr[0] = 'x'; + + pid = fork(); + ASSERT_NE(pid, -1); + + if (pid != 0) { + wait(NULL); + return; + } + + /* Child process below: */ + + /* Reopen for child. */ + ASSERT_EQ(close_procmap(&self->procmap), 0); + ASSERT_EQ(open_self_procmap(&self->procmap), 0); + + /* unCOWing everything does not cause the AVC to go away. */ + for (i = 0; i < 5 * page_size; i += page_size) + ptr[i] = 'x'; + + /* + * Map in adjacent VMA in child. + * + * forked + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + */ + ptr2 = mmap(&ptr[5 * page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 5 * page_size); +} + +TEST_F(merge, forked_source_vma) +{ + unsigned int page_size = self->page_size; + char *carveout = self->carveout; + struct procmap_fd *procmap = &self->procmap; + pid_t pid; + char *ptr, *ptr2; + int i; + + /* + * |-----------|------------| + * | unfaulted | <unmapped> | + * |-----------|------------| + */ + ptr = mmap(&carveout[page_size], 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Fault in process. + * + * |-----------|------------| + * | faulted | <unmapped> | + * |-----------|------------| + */ + ptr[0] = 'x'; + + pid = fork(); + ASSERT_NE(pid, -1); + + if (pid != 0) { + wait(NULL); + return; + } + + /* Child process below: */ + + /* Reopen for child. */ + ASSERT_EQ(close_procmap(&self->procmap), 0); + ASSERT_EQ(open_self_procmap(&self->procmap), 0); + + /* unCOWing everything does not cause the AVC to go away. */ + for (i = 0; i < 5 * page_size; i += page_size) + ptr[i] = 'x'; + + /* + * Map in adjacent VMA in child, ptr2 after ptr, but incompatible. + * + * forked RW RWX + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + */ + ptr2 = mmap(&carveout[6 * page_size], 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANON | MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); + + /* + * Now mprotect forked region to RWX so it becomes the source for the + * merge to unfaulted region: + * + * forked RWX RWX + * |-----------|-----------| + * | faulted | unfaulted | + * |-----------|-----------| + * ptr ptr2 + * + * This should NOT result in a merge, as ptr was forked. + */ + ASSERT_EQ(mprotect(ptr, 5 * page_size, PROT_READ | PROT_WRITE | PROT_EXEC), 0); + /* Again, make sure not merged. */ + ASSERT_TRUE(find_vma_procmap(procmap, ptr2)); + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr2); + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr2 + 5 * page_size); +} + +TEST_F(merge, handle_uprobe_upon_merged_vma) +{ + const size_t attr_sz = sizeof(struct perf_event_attr); + unsigned int page_size = self->page_size; + const char *probe_file = "./foo"; + char *carveout = self->carveout; + struct perf_event_attr attr; + unsigned long type; + void *ptr1, *ptr2; + int fd; + + fd = open(probe_file, O_RDWR|O_CREAT, 0600); + ASSERT_GE(fd, 0); + + ASSERT_EQ(ftruncate(fd, page_size), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; + attr.type = type; + attr.config1 = (__u64)(long)probe_file; + attr.config2 = 0x0; + + ASSERT_GE(syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0), 0); + + ptr1 = mmap(&carveout[page_size], 10 * page_size, PROT_EXEC, + MAP_PRIVATE | MAP_FIXED, fd, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + ptr2 = mremap(ptr1, page_size, 2 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr1 + 5 * page_size); + ASSERT_NE(ptr2, MAP_FAILED); + + ASSERT_NE(mremap(ptr2, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); + +out: + close(fd); + remove(probe_file); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 64bcbb7151cf..1e3a595fbf01 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -204,4 +204,103 @@ TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME) ASSERT_EQ(pthread_cancel(self->threads[i]), 0); } +/* + * migration test with shared anon THP page + */ + +TEST_F_TIMEOUT(migration, shared_anon_thp, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, 2 * TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG); + ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); + access_mem(ptr); + } else { + self->pids[i] = pid; + } + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + +/* + * migration test with private anon hugetlb page + */ +TEST_F_TIMEOUT(migration, private_anon_htlb, 2*RUNTIME) +{ + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) + if (pthread_create(&self->threads[i], NULL, access_mem, ptr)) + perror("Couldn't create thread"); + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(pthread_cancel(self->threads[i]), 0); +} + +/* + * migration test with shared anon hugetlb page + */ +TEST_F_TIMEOUT(migration, shared_anon_htlb, 2*RUNTIME) +{ + pid_t pid; + uint64_t *ptr; + int i; + + if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0) + SKIP(return, "Not enough threads or NUMA nodes available"); + + ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + memset(ptr, 0xde, TWOMEG); + for (i = 0; i < self->nthreads - 1; i++) { + pid = fork(); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); + access_mem(ptr); + } else { + self->pids[i] = pid; + } + } + + ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); + for (i = 0; i < self->nthreads - 1; i++) + ASSERT_EQ(kill(self->pids[i], SIGTERM), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index b8a7efe9204e..09feeb453646 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -281,6 +281,7 @@ static void test_uffdio_copy(void) dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0); if (dst == MAP_FAILED) { ksft_test_result_fail("mmap() failed\n"); + free(src); return; } diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c index 1cd80b0f76c3..b8d7e966f44c 100644 --- a/tools/testing/selftests/mm/mlock-random-test.c +++ b/tools/testing/selftests/mm/mlock-random-test.c @@ -161,9 +161,9 @@ static void test_mlock_within_limit(char *p, int alloc_size) MLOCK_ONFAULT); if (ret) - ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n", + ksft_exit_fail_msg("%s() failure (%s) at |%p(%d)| mlock:|%p(%d)|\n", is_mlock ? "mlock" : "mlock2", - p, alloc_size, + strerror(errno), p, alloc_size, p + start_offset, lock_size); } diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 7f0d50fa361d..3e90ff37e336 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -196,7 +196,7 @@ static void test_mlock_lock(void) ksft_exit_fail_msg("munlock(): %s\n", strerror(errno)); } - ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__); + ksft_test_result(!unlock_lock_check(map), "%s: Unlocked\n", __func__); munmap(map, 2 * page_size); } diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 4417eaa5cfb7..81e77fa41901 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -6,7 +6,13 @@ static int mlock2_(void *start, size_t len, int flags) { - return syscall(__NR_mlock2, start, len, flags); + int ret = syscall(__NR_mlock2, start, len, flags); + + if (ret) { + errno = ret; + return -1; + } + return 0; } static FILE *seek_to_smaps_entry(unsigned long addr) diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 5a3a9bcba640..bb84476a177f 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -34,7 +34,7 @@ struct config { unsigned long long dest_alignment; unsigned long long region_size; int overlapping; - int dest_preamble_size; + unsigned int dest_preamble_size; }; struct test { @@ -328,7 +328,7 @@ static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr) { char *test_name = "mremap mremap move within range"; void *src, *dest; - int i, success = 1; + unsigned int i, success = 1; size_t size = SIZE_MB(20); void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, @@ -384,7 +384,7 @@ out: static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) { - void *addr, *src_addr, *dest_addr, *dest_preamble_addr; + void *addr, *src_addr, *dest_addr, *dest_preamble_addr = NULL; unsigned long long t, d; struct timespec t_start = {0, 0}, t_end = {0, 0}; long long start_ns, end_ns, align_mask, ret, offset; @@ -569,7 +569,7 @@ static void mremap_move_1mb_from_start(unsigned int pattern_seed, { char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src"; void *src = NULL, *dest = NULL; - int i, success = 1; + unsigned int i, success = 1; /* Config to reuse get_source_mapping() to do an aligned mmap. */ struct config c = { @@ -636,7 +636,7 @@ out: static void run_mremap_test_case(struct test test_case, int *failures, unsigned int threshold_mb, - unsigned int pattern_seed, char *rand_addr) + char *rand_addr) { long long remap_time = remap_region(test_case.config, threshold_mb, rand_addr); @@ -708,7 +708,8 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb, int main(int argc, char **argv) { int failures = 0; - int i, run_perf_tests; + unsigned int i; + int run_perf_tests; unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD; /* hard-coded test configs */ @@ -831,7 +832,7 @@ int main(int argc, char **argv) for (i = 0; i < ARRAY_SIZE(test_cases); i++) run_mremap_test_case(test_cases[i], &failures, threshold_mb, - pattern_seed, rand_addr); + rand_addr); maps_fp = fopen("/proc/self/maps", "r"); @@ -853,7 +854,7 @@ int main(int argc, char **argv) "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:"); for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++) run_mremap_test_case(perf_test_cases[i], &failures, - threshold_mb, pattern_seed, + threshold_mb, rand_addr); } diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index 01675c412b2a..005f29c86484 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -218,7 +218,7 @@ bool seal_support(void) bool pkey_supported(void) { #if defined(__i386__) || defined(__x86_64__) /* arch */ - int pkey = sys_pkey_alloc(0, 0); + int pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (pkey > 0) return true; @@ -802,7 +802,7 @@ static void test_seal_mprotect_partial_mprotect_tail(bool seal) } -static void test_seal_mprotect_two_vma_with_gap(bool seal) +static void test_seal_mprotect_two_vma_with_gap(void) { void *ptr; unsigned long page_size = getpagesize(); @@ -1671,7 +1671,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) setup_single_address_rw(size, &ptr); FAIL_TEST_IF_FALSE(ptr != (void *)-1); - pkey = sys_pkey_alloc(0, 0); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); FAIL_TEST_IF_FALSE(pkey > 0); ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey); @@ -1683,7 +1683,7 @@ static void test_seal_discard_ro_anon_on_pkey(bool seal) } /* sealing doesn't take effect if PKRU allow write. */ - set_pkey(pkey, 0); + set_pkey(pkey, PKEY_UNRESTRICTED); ret = sys_madvise(ptr, size, MADV_DONTNEED); FAIL_TEST_IF_FALSE(!ret); @@ -1864,7 +1864,7 @@ static void test_seal_madvise_nodiscard(bool seal) REPORT_TEST_PASS(); } -int main(int argc, char **argv) +int main(void) { bool test_seal = seal_support(); @@ -1913,8 +1913,8 @@ int main(int argc, char **argv) test_seal_mprotect_partial_mprotect(false); test_seal_mprotect_partial_mprotect(true); - test_seal_mprotect_two_vma_with_gap(false); - test_seal_mprotect_two_vma_with_gap(true); + test_seal_mprotect_two_vma_with_gap(); + test_seal_mprotect_two_vma_with_gap(); test_seal_mprotect_merge(false); test_seal_mprotect_merge(true); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index bcc73b4e805c..b07acc86f4f0 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -34,8 +34,8 @@ #define PAGEMAP "/proc/self/pagemap" int pagemap_fd; int uffd; -int page_size; -int hpage_size; +unsigned long page_size; +unsigned int hpage_size; const char *progname; #define LEN(region) ((region.end - region.start)/page_size) @@ -112,7 +112,7 @@ int init_uffd(void) return 0; } -int wp_init(void *lpBaseAddress, int dwRegionSize) +int wp_init(void *lpBaseAddress, long dwRegionSize) { struct uffdio_register uffdio_register; struct uffdio_writeprotect wp; @@ -136,7 +136,7 @@ int wp_init(void *lpBaseAddress, int dwRegionSize) return 0; } -int wp_free(void *lpBaseAddress, int dwRegionSize) +int wp_free(void *lpBaseAddress, long dwRegionSize) { struct uffdio_register uffdio_register; @@ -184,7 +184,7 @@ void *gethugetlb_mem(int size, int *shmid) int userfaultfd_tests(void) { - int mem_size, vec_size, written, num_pages = 16; + long mem_size, vec_size, written, num_pages = 16; char *mem, *vec; mem_size = num_pages * page_size; @@ -213,7 +213,7 @@ int userfaultfd_tests(void) written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (written < 0) - ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", written, errno, strerror(errno)); ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__); @@ -235,7 +235,9 @@ int get_reads(struct page_region *vec, int vec_size) int sanity_tests_sd(void) { - int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0; + unsigned long long mem_size, vec_size, i, total_pages = 0; + long ret, ret2, ret3; + int num_pages = 1000; int total_writes, total_reads, reads, count; struct page_region *vec, *vec2; char *mem, *m[2]; @@ -321,9 +323,9 @@ int sanity_tests_sd(void) ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == mem_size/(page_size * 2), + ksft_test_result((unsigned long long)ret == mem_size/(page_size * 2), "%s Repeated pattern of written and non-written pages\n", __func__); /* 4. Repeated pattern of written and non-written pages in parts */ @@ -331,21 +333,21 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret3 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret3, errno, strerror(errno)); ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2, - "%s Repeated pattern of written and non-written pages in parts %d %d %d\n", + "%s Repeated pattern of written and non-written pages in parts %ld %ld %ld\n", __func__, ret, ret3, ret2); /* 5. Repeated pattern of written and non-written pages max_pages */ @@ -357,13 +359,13 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == num_pages/2 && ret2 == 1, "%s Repeated pattern of written and non-written pages max_pages\n", @@ -378,12 +380,12 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret2 < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret2, errno, strerror(errno)); ksft_test_result(ret == 1 && LEN(vec[0]) == 2 && vec[0].start == (uintptr_t)(mem + page_size) && @@ -416,7 +418,7 @@ int sanity_tests_sd(void) ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size, "%s Two regions\n", __func__); @@ -448,7 +450,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); for (i = 0; i < mem_size/page_size; i += 2) mem[i * page_size]++; @@ -457,7 +459,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -465,7 +467,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -473,7 +475,7 @@ int sanity_tests_sd(void) PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); total_pages += ret; @@ -515,9 +517,9 @@ int sanity_tests_sd(void) vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); - if (ret > vec_size) + if ((unsigned long)ret > vec_size) break; reads = get_reads(vec, ret); @@ -554,63 +556,63 @@ int sanity_tests_sd(void) ret = pagemap_ioc(mem, 0, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end address\n"); ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end with WP\n"); ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 0 && walk_end == (long)mem, "Walk_end: Same start and end with 0 output buffer\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: Big vec\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: vec of minimum length\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2), "Walk_end: Half max pages\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size), "Walk_end: 1 max page\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size), "Walk_end: max pages\n"); @@ -621,49 +623,49 @@ int sanity_tests_sd(void) ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Big vec\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end sparse: vec of minimum length\n"); ret = pagemap_ioc(mem, mem_size, vec, 1, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_end sparse: Max pages specified\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); - ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size), + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); + ksft_test_result((unsigned long)ret == vec_size/2 && walk_end == (long)(mem + mem_size), "Walk_endsparse : Half max pages\n"); ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0, 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2), "Walk_end: 1 max page\n"); @@ -674,9 +676,10 @@ int sanity_tests_sd(void) return 0; } -int base_tests(char *prefix, char *mem, int mem_size, int skip) +int base_tests(char *prefix, char *mem, unsigned long long mem_size, int skip) { - int vec_size, written; + unsigned long long vec_size; + int written; struct page_region *vec, *vec2; if (skip) { @@ -799,8 +802,8 @@ int hpage_unit_tests(void) char *map; int ret, ret2; size_t num_pages = 10; - int map_size = hpage_size * num_pages; - int vec_size = map_size/page_size; + unsigned long long map_size = hpage_size * num_pages; + unsigned long long vec_size = map_size/page_size; struct page_region *vec, *vec2; vec = malloc(sizeof(struct page_region) * vec_size); @@ -992,7 +995,7 @@ int unmapped_region_tests(void) { void *start = (void *)0x10000000; int written, len = 0x00040000; - int vec_size = len / page_size; + long vec_size = len / page_size; struct page_region *vec = malloc(sizeof(struct page_region) * vec_size); /* 1. Get written pages */ @@ -1047,7 +1050,8 @@ static void test_simple(void) int sanity_tests(void) { - int mem_size, vec_size, ret, fd, i, buf_size; + unsigned long long mem_size, vec_size; + long ret, fd, i, buf_size; struct page_region *vec; char *mem, *fmem; struct stat sbuf; @@ -1156,7 +1160,7 @@ int sanity_tests(void) ret = stat(progname, &sbuf); if (ret < 0) - ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno)); + ksft_exit_fail_msg("error %ld %d %s\n", ret, errno, strerror(errno)); fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (fmem == MAP_FAILED) @@ -1312,7 +1316,9 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, { struct pm_scan_arg arg = {0}; struct page_region rgns[256]; - int i, j, cnt, ret; + unsigned long long i, j; + long ret; + int cnt; arg.size = sizeof(struct pm_scan_arg); arg.start = (uintptr_t)mem; @@ -1330,7 +1336,7 @@ static ssize_t get_dirty_pages_reset(char *mem, unsigned int count, ksft_exit_fail_msg("ioctl failed\n"); cnt = 0; - for (i = 0; i < ret; ++i) { + for (i = 0; i < (unsigned long)ret; ++i) { if (rgns[i].categories != PAGE_IS_WRITTEN) ksft_exit_fail_msg("wrong flags\n"); @@ -1384,9 +1390,10 @@ void *thread_proc(void *mem) static void transact_test(int page_size) { unsigned int i, count, extra_pages; + unsigned int c; pthread_t th; char *mem; - int ret, c; + int ret; if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1)) ksft_exit_fail_msg("pthread_barrier_init\n"); @@ -1405,9 +1412,9 @@ static void transact_test(int page_size) memset(mem, 0, 0x1000 * nthreads * pages_per_thread); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); - ksft_test_result(count > 0, "%s count %d\n", __func__, count); + ksft_test_result(count > 0, "%s count %u\n", __func__, count); count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); - ksft_test_result(count == 0, "%s count %d\n", __func__, count); + ksft_test_result(count == 0, "%s count %u\n", __func__, count); finish = 0; for (i = 0; i < nthreads; ++i) @@ -1429,7 +1436,7 @@ static void transact_test(int page_size) ksft_exit_fail_msg("pthread_barrier_wait\n"); if (count > nthreads * access_per_thread) - ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n", + ksft_exit_fail_msg("Too big count %u expected %u, iter %u\n", count, nthreads * access_per_thread, i); c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size); @@ -1454,7 +1461,7 @@ static void transact_test(int page_size) * access and application gets page fault again for the same write. */ if (count < nthreads * access_per_thread) { - ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count, + ksft_test_result_fail("Lost update, iter %u, %u vs %u.\n", i, count, nthreads * access_per_thread); return; } @@ -1467,15 +1474,16 @@ static void transact_test(int page_size) finish = 1; pthread_barrier_wait(&end_barrier); - ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__, + ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %u.\n", __func__, extra_pages, 100.0 * extra_pages / (iter_count * nthreads * access_per_thread), extra_thread_faults); } -int main(int argc, char *argv[]) +int main(int __attribute__((unused)) argc, char *argv[]) { - int mem_size, shmid, buf_size, fd, i, ret; + int shmid, buf_size, fd, i, ret; + unsigned long long mem_size; char *mem, *map, *fmem; struct stat sbuf; diff --git a/tools/testing/selftests/mm/pfnmap.c b/tools/testing/selftests/mm/pfnmap.c new file mode 100644 index 000000000000..866ac023baf5 --- /dev/null +++ b/tools/testing/selftests/mm/pfnmap.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Basic VM_PFNMAP tests relying on mmap() of '/dev/mem' + * + * Copyright 2025, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <ctype.h> +#include <fcntl.h> +#include <signal.h> +#include <setjmp.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/wait.h> + +#include "../kselftest_harness.h" +#include "vm_util.h" + +static sigjmp_buf sigjmp_buf_env; + +static void signal_handler(int sig) +{ + siglongjmp(sigjmp_buf_env, -EFAULT); +} + +static int test_read_access(char *addr, size_t size, size_t pagesize) +{ + size_t offs; + int ret; + + if (signal(SIGSEGV, signal_handler) == SIG_ERR) + return -EINVAL; + + ret = sigsetjmp(sigjmp_buf_env, 1); + if (!ret) { + for (offs = 0; offs < size; offs += pagesize) + /* Force a read that the compiler cannot optimize out. */ + *((volatile char *)(addr + offs)); + } + if (signal(SIGSEGV, SIG_DFL) == SIG_ERR) + return -EINVAL; + + return ret; +} + +static int find_ram_target(off_t *phys_addr, + unsigned long long pagesize) +{ + unsigned long long start, end; + char line[80], *end_ptr; + FILE *file; + + /* Search /proc/iomem for the first suitable "System RAM" range. */ + file = fopen("/proc/iomem", "r"); + if (!file) + return -errno; + + while (fgets(line, sizeof(line), file)) { + /* Ignore any child nodes. */ + if (!isalnum(line[0])) + continue; + + if (!strstr(line, "System RAM\n")) + continue; + + start = strtoull(line, &end_ptr, 16); + /* Skip over the "-" */ + end_ptr++; + /* Make end "exclusive". */ + end = strtoull(end_ptr, NULL, 16) + 1; + + /* Actual addresses are not exported */ + if (!start && !end) + break; + + /* We need full pages. */ + start = (start + pagesize - 1) & ~(pagesize - 1); + end &= ~(pagesize - 1); + + if (start != (off_t)start) + break; + + /* We need two pages. */ + if (end > start + 2 * pagesize) { + fclose(file); + *phys_addr = start; + return 0; + } + } + return -ENOENT; +} + +FIXTURE(pfnmap) +{ + off_t phys_addr; + size_t pagesize; + int dev_mem_fd; + char *addr1; + size_t size1; + char *addr2; + size_t size2; +}; + +FIXTURE_SETUP(pfnmap) +{ + self->pagesize = getpagesize(); + + /* We'll require two physical pages throughout our tests ... */ + if (find_ram_target(&self->phys_addr, self->pagesize)) + SKIP(return, "Cannot find ram target in '/proc/iomem'\n"); + + self->dev_mem_fd = open("/dev/mem", O_RDONLY); + if (self->dev_mem_fd < 0) + SKIP(return, "Cannot open '/dev/mem'\n"); + + self->size1 = self->pagesize * 2; + self->addr1 = mmap(NULL, self->size1, PROT_READ, MAP_SHARED, + self->dev_mem_fd, self->phys_addr); + if (self->addr1 == MAP_FAILED) + SKIP(return, "Cannot mmap '/dev/mem'\n"); + + /* ... and want to be able to read from them. */ + if (test_read_access(self->addr1, self->size1, self->pagesize)) + SKIP(return, "Cannot read-access mmap'ed '/dev/mem'\n"); + + self->size2 = 0; + self->addr2 = MAP_FAILED; +} + +FIXTURE_TEARDOWN(pfnmap) +{ + if (self->addr2 != MAP_FAILED) + munmap(self->addr2, self->size2); + if (self->addr1 != MAP_FAILED) + munmap(self->addr1, self->size1); + if (self->dev_mem_fd >= 0) + close(self->dev_mem_fd); +} + +TEST_F(pfnmap, madvise_disallowed) +{ + int advices[] = { + MADV_DONTNEED, + MADV_DONTNEED_LOCKED, + MADV_FREE, + MADV_WIPEONFORK, + MADV_COLD, + MADV_PAGEOUT, + MADV_POPULATE_READ, + MADV_POPULATE_WRITE, + }; + int i; + + /* All these advices must be rejected. */ + for (i = 0; i < ARRAY_SIZE(advices); i++) { + EXPECT_LT(madvise(self->addr1, self->pagesize, advices[i]), 0); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(pfnmap, munmap_split) +{ + /* + * Unmap the first page. This munmap() call is not really expected to + * fail, but we might be able to trigger other internal issues. + */ + ASSERT_EQ(munmap(self->addr1, self->pagesize), 0); + + /* + * Remap the first page while the second page is still mapped. This + * makes sure that any PAT tracking on x86 will allow for mmap()'ing + * a page again while some parts of the first mmap() are still + * around. + */ + self->size2 = self->pagesize; + self->addr2 = mmap(NULL, self->pagesize, PROT_READ, MAP_SHARED, + self->dev_mem_fd, self->phys_addr); + ASSERT_NE(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_fixed) +{ + char *ret; + + /* Reserve a destination area. */ + self->size2 = self->size1; + self->addr2 = mmap(NULL, self->size2, PROT_READ, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(self->addr2, MAP_FAILED); + + /* mremap() over our destination. */ + ret = mremap(self->addr1, self->size1, self->size2, + MREMAP_FIXED | MREMAP_MAYMOVE, self->addr2); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_shrink) +{ + char *ret; + + /* Shrinking is expected to work. */ + ret = mremap(self->addr1, self->size1, self->size1 - self->pagesize, 0); + ASSERT_NE(ret, MAP_FAILED); +} + +TEST_F(pfnmap, mremap_expand) +{ + /* + * Growing is not expected to work, and getting it right would + * be challenging. So this test primarily serves as an early warning + * that something that probably should never work suddenly works. + */ + self->size2 = self->size1 + self->pagesize; + self->addr2 = mremap(self->addr1, self->size1, self->size2, MREMAP_MAYMOVE); + ASSERT_EQ(self->addr2, MAP_FAILED); +} + +TEST_F(pfnmap, fork) +{ + pid_t pid; + int ret; + + /* fork() a child and test if the child can access the pages. */ + pid = fork(); + ASSERT_GE(pid, 0); + + if (!pid) { + EXPECT_EQ(test_read_access(self->addr1, self->size1, + self->pagesize), 0); + exit(0); + } + + wait(&ret); + if (WIFEXITED(ret)) + ret = WEXITSTATUS(ret); + else + ret = -EINVAL; + ASSERT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h index d9d2100eafc0..8e9685e03c44 100644 --- a/tools/testing/selftests/mm/pkey-arm64.h +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -30,7 +30,7 @@ #define NR_PKEYS 8 #define NR_RESERVED_PKEYS 1 /* pkey-0 */ -#define PKEY_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_ALL 0x77777777 #define PKEY_REG_ALLOW_NONE 0x0 #define PKEY_BITS_PER_PKEY 4 @@ -81,11 +81,11 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { } -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { return PTR_ERR_ENOTSUP; } diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index f7cfe163b0ff..ea404f80e6cb 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -13,22 +13,23 @@ #include <ucontext.h> #include <sys/mman.h> +#include <linux/mman.h> +#include <linux/types.h> + #include "../kselftest.h" /* Define some kernel-like types */ -#define u8 __u8 -#define u16 __u16 -#define u32 __u32 -#define u64 __u64 +typedef __u8 u8; +typedef __u16 u16; +typedef __u32 u32; +typedef __u64 u64; #define PTR_ERR_ENOTSUP ((void *)-ENOTSUP) #ifndef DEBUG_LEVEL #define DEBUG_LEVEL 0 #endif -#define DPRINT_IN_SIGNAL_BUF_SIZE 4096 extern int dprint_in_signal; -extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; extern int test_nr; extern int iteration_nr; @@ -83,17 +84,18 @@ extern void abort_hooks(void); #ifndef noinline # define noinline __attribute__((noinline)) #endif +#ifndef __maybe_unused +# define __maybe_unused __attribute__((__unused__)) +#endif -noinline int read_ptr(int *ptr) -{ - /* Keep GCC from optimizing this away somehow */ - barrier(); - return *ptr; -} - -void expected_pkey_fault(int pkey); int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey); + +/* For functions called from protection_keys.c only */ +noinline int read_ptr(int *ptr); +void expected_pkey_fault(int pkey); int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey); void record_pkey_malloc(void *ptr, long size, int prot); @@ -171,38 +173,6 @@ static inline void write_pkey_reg(u64 pkey_reg) pkey_reg, __read_pkey_reg()); } -/* - * These are technically racy. since something could - * change PKEY register between the read and the write. - */ -static inline void __pkey_access_allow(int pkey, int do_allow) -{ - u64 pkey_reg = read_pkey_reg(); - int bit = pkey * 2; - - if (do_allow) - pkey_reg &= (1<<bit); - else - pkey_reg |= (1<<bit); - - dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); - write_pkey_reg(pkey_reg); -} - -static inline void __pkey_write_allow(int pkey, int do_allow_write) -{ - u64 pkey_reg = read_pkey_reg(); - int bit = pkey * 2 + 1; - - if (do_allow_write) - pkey_reg &= (1<<bit); - else - pkey_reg |= (1<<bit); - - write_pkey_reg(pkey_reg); - dprintf4("pkey_reg now: %016llx\n", read_pkey_reg()); -} - #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) \ @@ -224,7 +194,7 @@ static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si) static inline int kernel_has_pkeys(void) { /* try allocating a key and see if it succeeds */ - int ret = sys_pkey_alloc(0, 0); + int ret = sys_pkey_alloc(0, PKEY_UNRESTRICTED); if (ret <= 0) { return 0; } diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 3d0c0bdae5bc..17bf2d1b0192 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,6 +3,8 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H +#include <sys/stat.h> + #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 @@ -91,7 +93,7 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS_64K_3KEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { /* * powerpc does not allow userspace to change permissions of exec-only @@ -102,10 +104,20 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { void *ptr; int ret; diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index ac91777c8917..f7ecd335df1e 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -113,7 +113,7 @@ static inline u32 pkey_bit_position(int pkey) #define XSTATE_PKEY 0x200 #define XSTATE_BV_OFFSET 512 -int pkey_reg_xstate_offset(void) +static inline int pkey_reg_xstate_offset(void) { unsigned int eax; unsigned int ebx; @@ -148,7 +148,7 @@ static inline int get_arch_reserved_keys(void) return NR_RESERVED_PKEYS; } -void expect_fault_on_read_execonly_key(void *p1, int pkey) +static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) { int ptr_contents; @@ -157,7 +157,7 @@ void expect_fault_on_read_execonly_key(void *p1, int pkey) expected_pkey_fault(pkey); } -void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { return PTR_ERR_ENOTSUP; } diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index c593a426341c..b5e076a564c9 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -32,11 +32,9 @@ #define STACK_SIZE PTHREAD_STACK_MIN -void expected_pkey_fault(int pkey) {} - -pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t cond = PTHREAD_COND_INITIALIZER; -siginfo_t siginfo = {0}; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +static siginfo_t siginfo = {0}; /* * We need to use inline assembly instead of glibc's syscall because glibc's @@ -163,7 +161,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr) __write_pkey_reg(pkey_reg_restrictive_default()); /* Segfault (with SEGV_MAPERR) */ - *(int *) (0x1) = 1; + *(volatile int *)NULL = 1; return NULL; } @@ -179,7 +177,6 @@ static void *thread_segv_pkuerr_stack(void *ptr) static void *thread_segv_maperr_ptr(void *ptr) { stack_t *stack = ptr; - int *bad = (int *)1; u64 pkey_reg; /* @@ -195,7 +192,7 @@ static void *thread_segv_maperr_ptr(void *ptr) __write_pkey_reg(pkey_reg); /* Segfault */ - *bad = 1; + *(volatile int *)NULL = 1; syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); return NULL; } @@ -234,7 +231,7 @@ static void test_sigsegv_handler_with_pkey0_disabled(void) ksft_test_result(siginfo.si_signo == SIGSEGV && siginfo.si_code == SEGV_MAPERR && - siginfo.si_addr == (void *)1, + siginfo.si_addr == NULL, "%s\n", __func__); } @@ -314,11 +311,11 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ - pkey = pkey_alloc(0, 0); - pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); + sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ - sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); sigstack.ss_flags = 0; sigstack.ss_size = STACK_SIZE; @@ -349,7 +346,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) ksft_test_result(siginfo.si_signo == SIGSEGV && siginfo.si_code == SEGV_MAPERR && - siginfo.si_addr == (void *)1, + siginfo.si_addr == NULL, "%s\n", __func__); } @@ -487,11 +484,11 @@ static void test_pkru_sigreturn(void) __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ - pkey = pkey_alloc(0, 0); - pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + pkey = sys_pkey_alloc(0, PKEY_UNRESTRICTED); + sys_mprotect_pkey(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); /* Set up alternate signal stack that will use the default MPK */ - sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); sigstack.ss_flags = 0; sigstack.ss_size = STACK_SIZE; @@ -538,6 +535,9 @@ int main(int argc, char *argv[]) ksft_print_header(); ksft_set_plan(ARRAY_SIZE(pkey_tests)); + if (!is_pkeys_supported()) + ksft_exit_skip("pkeys not supported\n"); + for (i = 0; i < ARRAY_SIZE(pkey_tests); i++) (*pkey_tests[i])(); diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c new file mode 100644 index 000000000000..255b332f7a08 --- /dev/null +++ b/tools/testing/selftests/mm/pkey_util.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define __SANE_USERSPACE_TYPES__ +#include <sys/syscall.h> +#include <unistd.h> + +#include "pkey-helpers.h" + +int sys_pkey_alloc(unsigned long flags, unsigned long init_val) +{ + int ret = syscall(SYS_pkey_alloc, flags, init_val); + dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", + __func__, flags, init_val, ret, errno); + return ret; +} + +int sys_pkey_free(unsigned long pkey) +{ + int ret = syscall(SYS_pkey_free, pkey); + dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); + return ret; +} + +int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, + unsigned long pkey) +{ + int sret; + + dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, + ptr, size, orig_prot, pkey); + + errno = 0; + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); + if (errno) { + dprintf2("SYS_mprotect_key sret: %d\n", sret); + dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); + dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); + if (DEBUG_LEVEL >= 2) + perror("SYS_mprotect_pkey"); + } + return sret; +} diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 4fcecfb7b189..23ebec367015 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -53,9 +53,15 @@ int test_nr; u64 shadow_pkey_reg; int dprint_in_signal; -char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE]; -void cat_into_file(char *str, char *file) +noinline int read_ptr(int *ptr) +{ + /* Keep GCC from optimizing this away somehow */ + barrier(); + return *ptr; +} + +static void cat_into_file(char *str, char *file) { int fd = open(file, O_RDWR); int ret; @@ -82,7 +88,7 @@ void cat_into_file(char *str, char *file) #if CONTROL_TRACING > 0 static int warned_tracing; -int tracing_root_ok(void) +static int tracing_root_ok(void) { if (geteuid() != 0) { if (!warned_tracing) @@ -95,7 +101,7 @@ int tracing_root_ok(void) } #endif -void tracing_on(void) +static void tracing_on(void) { #if CONTROL_TRACING > 0 #define TRACEDIR "/sys/kernel/tracing" @@ -119,7 +125,7 @@ void tracing_on(void) #endif } -void tracing_off(void) +static void tracing_off(void) { #if CONTROL_TRACING > 0 if (!tracing_root_ok()) @@ -153,7 +159,7 @@ __attribute__((__aligned__(65536))) #else __attribute__((__aligned__(PAGE_SIZE))) #endif -void lots_o_noops_around_write(int *write_to_me) +static void lots_o_noops_around_write(int *write_to_me) { dprintf3("running %s()\n", __func__); __page_o_noops(); @@ -164,7 +170,7 @@ void lots_o_noops_around_write(int *write_to_me) dprintf3("%s() done\n", __func__); } -void dump_mem(void *dumpme, int len_bytes) +static void dump_mem(void *dumpme, int len_bytes) { char *c = (void *)dumpme; int i; @@ -207,7 +213,7 @@ static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) return 0; } -void pkey_disable_set(int pkey, int flags) +static void pkey_disable_set(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; @@ -245,7 +251,7 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); } -void pkey_disable_clear(int pkey, int flags) +static void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; @@ -271,19 +277,19 @@ void pkey_disable_clear(int pkey, int flags) pkey, read_pkey_reg()); } -void pkey_write_allow(int pkey) +__maybe_unused static void pkey_write_allow(int pkey) { pkey_disable_clear(pkey, PKEY_DISABLE_WRITE); } -void pkey_write_deny(int pkey) +__maybe_unused static void pkey_write_deny(int pkey) { pkey_disable_set(pkey, PKEY_DISABLE_WRITE); } -void pkey_access_allow(int pkey) +__maybe_unused static void pkey_access_allow(int pkey) { pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS); } -void pkey_access_deny(int pkey) +__maybe_unused static void pkey_access_deny(int pkey) { pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); } @@ -301,9 +307,9 @@ static char *si_code_str(int si_code) return "UNKNOWN"; } -int pkey_faults; -int last_si_pkey = -1; -void signal_handler(int signum, siginfo_t *si, void *vucontext) +static int pkey_faults; +static int last_si_pkey = -1; +static void signal_handler(int signum, siginfo_t *si, void *vucontext) { ucontext_t *uctxt = vucontext; int trapno; @@ -390,27 +396,21 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) /* restore access and let the faulting instruction continue */ pkey_access_allow(siginfo_pkey); #elif defined(__aarch64__) - aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL); + aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL); #endif /* arch */ pkey_faults++; dprintf1("<<<<==================================================\n"); dprint_in_signal = 0; } -int wait_all_children(void) -{ - int status; - return waitpid(-1, &status, 0); -} - -void sig_chld(int x) +static void sig_chld(int x) { dprint_in_signal = 1; dprintf2("[%d] SIGCHLD: %d\n", getpid(), x); dprint_in_signal = 0; } -void setup_sigsegv_handler(void) +static void setup_sigsegv_handler(void) { int r, rs; struct sigaction newact; @@ -436,13 +436,13 @@ void setup_sigsegv_handler(void) pkey_assert(r == 0); } -void setup_handlers(void) +static void setup_handlers(void) { signal(SIGCHLD, &sig_chld); setup_sigsegv_handler(); } -pid_t fork_lazy_child(void) +static pid_t fork_lazy_child(void) { pid_t forkret; @@ -460,38 +460,10 @@ pid_t fork_lazy_child(void) return forkret; } -int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, - unsigned long pkey) -{ - int sret; - - dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__, - ptr, size, orig_prot, pkey); - - errno = 0; - sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); - if (errno) { - dprintf2("SYS_mprotect_key sret: %d\n", sret); - dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); - dprintf2("SYS_mprotect_key failed, errno: %d\n", errno); - if (DEBUG_LEVEL >= 2) - perror("SYS_mprotect_pkey"); - } - return sret; -} - -int sys_pkey_alloc(unsigned long flags, unsigned long init_val) -{ - int ret = syscall(SYS_pkey_alloc, flags, init_val); - dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n", - __func__, flags, init_val, ret, errno); - return ret; -} - -int alloc_pkey(void) +static int alloc_pkey(void) { int ret; - unsigned long init_val = 0x0; + unsigned long init_val = PKEY_UNRESTRICTED; dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n", __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); @@ -534,19 +506,12 @@ int alloc_pkey(void) return ret; } -int sys_pkey_free(unsigned long pkey) -{ - int ret = syscall(SYS_pkey_free, pkey); - dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret); - return ret; -} - /* * I had a bug where pkey bits could be set by mprotect() but * not cleared. This ensures we get lots of random bit sets * and clears on the vma and pte pkey bits. */ -int alloc_random_pkey(void) +static int alloc_random_pkey(void) { int max_nr_pkey_allocs; int ret; @@ -629,7 +594,7 @@ struct pkey_malloc_record { }; struct pkey_malloc_record *pkey_malloc_records; struct pkey_malloc_record *pkey_last_malloc_record; -long nr_pkey_malloc_records; +static long nr_pkey_malloc_records; void record_pkey_malloc(void *ptr, long size, int prot) { long i; @@ -667,7 +632,7 @@ void record_pkey_malloc(void *ptr, long size, int prot) nr_pkey_malloc_records++; } -void free_pkey_malloc(void *ptr) +static void free_pkey_malloc(void *ptr) { long i; int ret; @@ -694,8 +659,7 @@ void free_pkey_malloc(void *ptr) pkey_assert(false); } - -void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) +static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) { void *ptr; int ret; @@ -715,7 +679,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) return ptr; } -void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) +static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) { int ret; void *ptr; @@ -745,10 +709,10 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) return ptr; } -int hugetlb_setup_ok; +static int hugetlb_setup_ok; #define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages" #define GET_NR_HUGE_PAGES 10 -void setup_hugetlbfs(void) +static void setup_hugetlbfs(void) { int err; int fd; @@ -796,7 +760,7 @@ void setup_hugetlbfs(void) hugetlb_setup_ok = 1; } -void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) +static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) { void *ptr; int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB; @@ -817,42 +781,15 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) return ptr; } -void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) -{ - void *ptr; - int fd; - - dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__, - size, prot, pkey); - pkey_assert(pkey < NR_PKEYS); - fd = open("/dax/foo", O_RDWR); - pkey_assert(fd >= 0); - - ptr = mmap(0, size, prot, MAP_SHARED, fd, 0); - pkey_assert(ptr != (void *)-1); - - mprotect_pkey(ptr, size, prot, pkey); - - record_pkey_malloc(ptr, size, prot); - - dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); - close(fd); - return ptr; -} - -void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { +static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = { malloc_pkey_with_mprotect, malloc_pkey_with_mprotect_subpage, malloc_pkey_anon_huge, malloc_pkey_hugetlb -/* can not do direct with the pkey_mprotect() API: - malloc_pkey_mmap_direct, - malloc_pkey_mmap_dax, -*/ }; -void *malloc_pkey(long size, int prot, u16 pkey) +static void *malloc_pkey(long size, int prot, u16 pkey) { void *ret; static int malloc_type; @@ -882,7 +819,7 @@ void *malloc_pkey(long size, int prot, u16 pkey) return ret; } -int last_pkey_faults; +static int last_pkey_faults; #define UNKNOWN_PKEY -2 void expected_pkey_fault(int pkey) { @@ -905,7 +842,7 @@ void expected_pkey_fault(int pkey) */ if (__read_pkey_reg() != 0) #elif defined(__aarch64__) - if (__read_pkey_reg() != PKEY_ALLOW_ALL) + if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL) #else if (__read_pkey_reg() != shadow_pkey_reg) #endif /* arch */ @@ -924,9 +861,9 @@ void expected_pkey_fault(int pkey) pkey_assert(last_pkey_faults == pkey_faults); \ } while (0) -int test_fds[10] = { -1 }; -int nr_test_fds; -void __save_test_fd(int fd) +static int test_fds[10] = { -1 }; +static int nr_test_fds; +static void __save_test_fd(int fd) { pkey_assert(fd >= 0); pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds)); @@ -934,14 +871,14 @@ void __save_test_fd(int fd) nr_test_fds++; } -int get_test_read_fd(void) +static int get_test_read_fd(void) { int test_fd = open("/etc/passwd", O_RDONLY); __save_test_fd(test_fd); return test_fd; } -void close_test_fds(void) +static void close_test_fds(void) { int i; @@ -954,7 +891,7 @@ void close_test_fds(void) nr_test_fds = 0; } -void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) +static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) { int i, err; int max_nr_pkey_allocs; @@ -1006,7 +943,7 @@ void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) pkey_assert(!err); } -void test_read_of_write_disabled_region(int *ptr, u16 pkey) +static void test_read_of_write_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1016,7 +953,7 @@ void test_read_of_write_disabled_region(int *ptr, u16 pkey) dprintf1("*ptr: %d\n", ptr_contents); dprintf1("\n"); } -void test_read_of_access_disabled_region(int *ptr, u16 pkey) +static void test_read_of_access_disabled_region(int *ptr, u16 pkey) { int ptr_contents; @@ -1028,7 +965,7 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey) expected_pkey_fault(pkey); } -void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, +static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { int ptr_contents; @@ -1045,7 +982,7 @@ void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, +static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { *ptr = __LINE__; @@ -1056,14 +993,14 @@ void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_write_of_write_disabled_region(int *ptr, u16 pkey) +static void test_write_of_write_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey); pkey_write_deny(pkey); *ptr = __LINE__; expected_pkey_fault(pkey); } -void test_write_of_access_disabled_region(int *ptr, u16 pkey) +static void test_write_of_access_disabled_region(int *ptr, u16 pkey) { dprintf1("disabling access to PKEY[%02d], doing write\n", pkey); pkey_access_deny(pkey); @@ -1071,7 +1008,7 @@ void test_write_of_access_disabled_region(int *ptr, u16 pkey) expected_pkey_fault(pkey); } -void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, +static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, u16 pkey) { *ptr = __LINE__; @@ -1082,7 +1019,7 @@ void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr, expected_pkey_fault(pkey); } -void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) +static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) { int ret; int test_fd = get_test_read_fd(); @@ -1094,7 +1031,8 @@ void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey) dprintf1("read ret: %d\n", ret); pkey_assert(ret); } -void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) + +static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) { int ret; int test_fd = get_test_read_fd(); @@ -1107,7 +1045,7 @@ void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey) pkey_assert(ret); } -void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) +static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) { int pipe_ret, vmsplice_ret; struct iovec iov; @@ -1129,7 +1067,7 @@ void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey) close(pipe_fds[1]); } -void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) +static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) { int ignored = 0xdada; int futex_ret; @@ -1147,7 +1085,7 @@ void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) +static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) { int err; int i; @@ -1170,7 +1108,7 @@ void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) +static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) { int err; int bad_pkey = NR_PKEYS+99; @@ -1180,7 +1118,7 @@ void test_pkey_syscalls_bad_args(int *ptr, u16 pkey) pkey_assert(err); } -void become_child(void) +static void become_child(void) { pid_t forkret; @@ -1196,7 +1134,7 @@ void become_child(void) } /* Assumes that all pkeys other than 'pkey' are unallocated */ -void test_pkey_alloc_exhaust(int *ptr, u16 pkey) +static void test_pkey_alloc_exhaust(int *ptr, u16 pkey) { int err; int allocated_pkeys[NR_PKEYS] = {0}; @@ -1263,7 +1201,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } -void arch_force_pkey_reg_init(void) +static void arch_force_pkey_reg_init(void) { #if defined(__i386__) || defined(__x86_64__) /* arch */ u64 *buf; @@ -1302,7 +1240,7 @@ void arch_force_pkey_reg_init(void) * a long-running test that continually checks the pkey * register. */ -void test_pkey_init_state(int *ptr, u16 pkey) +static void test_pkey_init_state(int *ptr, u16 pkey) { int err; int allocated_pkeys[NR_PKEYS] = {0}; @@ -1340,7 +1278,7 @@ void test_pkey_init_state(int *ptr, u16 pkey) * have to call pkey_alloc() to use it first. Make sure that it * is usable. */ -void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +static void test_mprotect_with_pkey_0(int *ptr, u16 pkey) { long size; int prot; @@ -1364,7 +1302,7 @@ void test_mprotect_with_pkey_0(int *ptr, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); } -void test_ptrace_of_child(int *ptr, u16 pkey) +static void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; pid_t child_pid; @@ -1440,7 +1378,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void *get_pointer_to_instructions(void) +static void *get_pointer_to_instructions(void) { void *p1; @@ -1461,7 +1399,7 @@ void *get_pointer_to_instructions(void) return p1; } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +static void test_executing_on_unreadable_memory(int *ptr, u16 pkey) { void *p1; int scratch; @@ -1493,7 +1431,7 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) pkey_assert(!ret); } -void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) { void *p1; int scratch; @@ -1542,7 +1480,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) } #if defined(__i386__) || defined(__x86_64__) -void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +static void test_ptrace_modifies_pkru(int *ptr, u16 pkey) { u32 new_pkru; pid_t child; @@ -1665,7 +1603,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) #endif #if defined(__aarch64__) -void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +static void test_ptrace_modifies_pkru(int *ptr, u16 pkey) { pid_t child; int status, ret; @@ -1742,7 +1680,7 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) } #endif -void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) +static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; int sret; @@ -1756,7 +1694,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) pkey_assert(sret < 0); } -void (*pkey_tests[])(int *ptr, u16 pkey) = { +static void (*pkey_tests[])(int *ptr, u16 pkey) = { test_read_of_write_disabled_region, test_read_of_access_disabled_region, test_read_of_access_disabled_region_with_page_already_mapped, @@ -1782,7 +1720,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { #endif }; -void run_tests_once(void) +static void run_tests_once(void) { int *ptr; int prot = PROT_READ|PROT_WRITE; @@ -1816,7 +1754,7 @@ void run_tests_once(void) iteration_nr++; } -void pkey_setup_shadow(void) +static void pkey_setup_shadow(void) { shadow_pkey_reg = __read_pkey_reg(); } diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 2fc290d9430c..dddd1dd8af14 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -45,6 +45,8 @@ separated by spaces: vmalloc smoke tests - hmm hmm smoke tests +- madv_guard + test madvise(2) MADV_GUARD_INSTALL and MADV_GUARD_REMOVE options - madv_populate test memadvise(2) MADV_POPULATE_{READ,WRITE} options - memfd_secret @@ -61,6 +63,8 @@ separated by spaces: test soft dirty page bit semantics - pagemap test pagemap_scan IOCTL +- pfnmap + tests for VM_PFNMAP handling - cow test copy-on-write semantics - thp @@ -77,6 +81,8 @@ separated by spaces: test prctl(PR_SET_MDWE, ...) - page_frag test handling of page fragment allocation and freeing +- vma_merge + test VMA merge cases behave as expected example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -185,9 +191,10 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then printf "Not enough huge pages available (%d < %d)\n" \ "$freepgs" "$needpgs" fi + HAVE_HUGEPAGES=1 else echo "no hugetlbfs support in kernel?" - exit 1 + HAVE_HUGEPAGES=0 fi # filter 64bit architectures @@ -216,13 +223,20 @@ pretty_name() { # Usage: run_test [test binary] [arbitrary test arguments...] run_test() { if test_selected ${CATEGORY}; then + local skip=0 + # On memory constrainted systems some tests can fail to allocate hugepages. # perform some cleanup before the test for a higher success rate. - if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then - echo 3 > /proc/sys/vm/drop_caches - sleep 2 - echo 1 > /proc/sys/vm/compact_memory - sleep 2 + if [ ${CATEGORY} == "thp" -o ${CATEGORY} == "hugetlb" ]; then + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + echo 3 > /proc/sys/vm/drop_caches + sleep 2 + echo 1 > /proc/sys/vm/compact_memory + sleep 2 + else + echo "hugepages not supported" | tap_prefix + skip=1 + fi fi local test=$(pretty_name "$*") @@ -230,8 +244,12 @@ run_test() { local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -) printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix - ("$@" 2>&1) | tap_prefix - local ret=${PIPESTATUS[0]} + if [ "${skip}" != "1" ]; then + ("$@" 2>&1) | tap_prefix + local ret=${PIPESTATUS[0]} + else + local ret=$ksft_skip + fi count_total=$(( count_total + 1 )) if [ $ret -eq 0 ]; then count_pass=$(( count_pass + 1 )) @@ -269,13 +287,15 @@ CATEGORY="hugetlb" run_test ./hugepage-vmemmap CATEGORY="hugetlb" run_test ./hugetlb-madvise CATEGORY="hugetlb" run_test ./hugetlb_dio -nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) -# For this test, we need one and just one huge page -echo 1 > /proc/sys/vm/nr_hugepages -CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv -CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map -# Restore the previous number of huge pages, since further tests rely on it -echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages) + # For this test, we need one and just one huge page + echo 1 > /proc/sys/vm/nr_hugepages + CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv + CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map + # Restore the previous number of huge pages, since further tests rely on it + echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages +fi if test_selected "hugetlb"; then echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix @@ -302,18 +322,42 @@ uffd_stress_bin=./uffd-stress CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16 # Hugetlb tests require source and destination huge pages. Pass in half # the size of the free pages we have, which is used for *each*. -half_ufd_size_MB=$((freepgs / 2)) +# uffd-stress expects a region expressed in MiB, so we adjust +# half_ufd_size_MB accordingly. +half_ufd_size_MB=$(((freepgs * hpgsize_KB) / 1024 / 2)) CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16 CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16 +# uffd-wp-mremap requires at least one page of each size. +have_all_size_hugepgs=true +declare -A nr_size_hugepgs +for f in /sys/kernel/mm/hugepages/**/nr_hugepages; do + old=$(cat $f) + nr_size_hugepgs["$f"]="$old" + if [ "$old" == 0 ]; then + echo 1 > "$f" + fi + if [ $(cat "$f") == 0 ]; then + have_all_size_hugepgs=false + break + fi +done +if $have_all_size_hugepgs; then + CATEGORY="userfaultfd" run_test ./uffd-wp-mremap +else + echo "# SKIP ./uffd-wp-mremap" +fi #cleanup +for f in "${!nr_size_hugepgs[@]}"; do + echo "${nr_size_hugepgs["$f"]}" > "$f" +done echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages CATEGORY="compaction" run_test ./compaction_test -if command -v sudo &> /dev/null; +if command -v sudo &> /dev/null && sudo -u nobody ls ./on-fault-limit >/dev/null; then CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit else @@ -375,17 +419,24 @@ CATEGORY="mremap" run_test ./mremap_dontunmap CATEGORY="hmm" run_test bash ./test_hmm.sh smoke +# MADV_GUARD_INSTALL and MADV_GUARD_REMOVE tests +CATEGORY="madv_guard" run_test ./guard-regions + # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +CATEGORY="vma_merge" run_test ./merge + if [ -x ./memfd_secret ] then -(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix +(echo 0 > /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 -CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +if [ "${HAVE_HUGEPAGES}" = "1" ]; then + CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +fi # KSM KSM_MERGE_TIME test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages @@ -423,6 +474,8 @@ fi CATEGORY="pagemap" run_test ./pagemap_ioctl +CATEGORY="pfnmap" run_test ./pfnmap + # COW tests CATEGORY="cow" run_test ./cow @@ -434,15 +487,17 @@ CATEGORY="thp" run_test ./transhuge-stress -d 20 # Try to create XFS if not provided if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then - if test_selected "thp"; then - if grep xfs /proc/filesystems &>/dev/null; then - XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) - SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) - truncate -s 314572800 ${XFS_IMG} - mkfs.xfs -q ${XFS_IMG} - mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} - MOUNTED_XFS=1 - fi + if [ "${HAVE_HUGEPAGES}" = "1" ]; then + if test_selected "thp"; then + if grep xfs /proc/filesystems &>/dev/null; then + XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX) + SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX) + truncate -s 314572800 ${XFS_IMG} + mkfs.xfs -q ${XFS_IMG} + mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH} + MOUNTED_XFS=1 + fi + fi fi fi diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c deleted file mode 100644 index d9f8ba8d5050..000000000000 --- a/tools/testing/selftests/mm/seal_elf.c +++ /dev/null @@ -1,137 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#include <sys/mman.h> -#include <stdint.h> -#include <asm-generic/unistd.h> -#include <string.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <stdbool.h> -#include "../kselftest.h" -#include <syscall.h> -#include <errno.h> -#include <stdio.h> -#include <stdlib.h> -#include <fcntl.h> -#include <sys/ioctl.h> -#include <sys/vfs.h> -#include <sys/stat.h> -#include "mseal_helpers.h" - -/* - * define sys_xyx to call syscall directly. - */ -static int sys_mseal(void *start, size_t len) -{ - int sret; - - errno = 0; - sret = syscall(__NR_mseal, start, len, 0); - return sret; -} - -static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot) -{ - int sret; - - errno = 0; - sret = syscall(__NR_mprotect, ptr, size, prot); - return sret; -} - -static bool seal_support(void) -{ - int ret; - void *ptr; - unsigned long page_size = getpagesize(); - - ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); - if (ptr == (void *) -1) - return false; - - ret = sys_mseal(ptr, page_size); - if (ret < 0) - return false; - - return true; -} - -const char somestr[4096] = {"READONLY"}; - -static void test_seal_elf(void) -{ - int ret; - FILE *maps; - char line[512]; - uintptr_t addr_start, addr_end; - char prot[5]; - char filename[256]; - unsigned long page_size = getpagesize(); - unsigned long long ptr = (unsigned long long) somestr; - char *somestr2 = (char *)somestr; - - /* - * Modify the protection of readonly somestr - */ - if (((unsigned long long)ptr % page_size) != 0) - ptr = (unsigned long long)ptr & ~(page_size - 1); - - ksft_print_msg("somestr = %s\n", somestr); - ksft_print_msg("change protection to rw\n"); - ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE); - FAIL_TEST_IF_FALSE(!ret); - *somestr2 = 'A'; - ksft_print_msg("somestr is modified to: %s\n", somestr); - ret = sys_mprotect((void *)ptr, page_size, PROT_READ); - FAIL_TEST_IF_FALSE(!ret); - - maps = fopen("/proc/self/maps", "r"); - FAIL_TEST_IF_FALSE(maps); - - /* - * apply sealing to elf binary - */ - while (fgets(line, sizeof(line), maps)) { - if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]", - &addr_start, &addr_end, prot, filename) == 4) { - if (strlen(filename)) { - /* - * seal the mapping if read only. - */ - if (strstr(prot, "r-")) { - ret = sys_mseal((void *)addr_start, addr_end - addr_start); - FAIL_TEST_IF_FALSE(!ret); - ksft_print_msg("sealed: %lx-%lx %s %s\n", - addr_start, addr_end, prot, filename); - if ((uintptr_t) somestr >= addr_start && - (uintptr_t) somestr <= addr_end) - ksft_print_msg("mapping for somestr found\n"); - } - } - } - } - fclose(maps); - - ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE); - FAIL_TEST_IF_FALSE(ret < 0); - ksft_print_msg("somestr is sealed, mprotect is rejected\n"); - - REPORT_TEST_PASS(); -} - -int main(int argc, char **argv) -{ - bool test_seal = seal_support(); - - ksft_print_header(); - ksft_print_msg("pid=%d\n", getpid()); - - if (!test_seal) - ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n"); - - ksft_set_plan(1); - - test_seal_elf(); - - ksft_finished(); -} diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c index bdfa5d085f00..8e1462ce0532 100644 --- a/tools/testing/selftests/mm/soft-dirty.c +++ b/tools/testing/selftests/mm/soft-dirty.c @@ -128,7 +128,7 @@ static void test_mprotect(int pagemap_fd, int pagesize, bool anon) { const char *type[] = {"file", "anon"}; const char *fname = "./soft-dirty-test-file"; - int test_fd; + int test_fd = 0; char *map; if (anon) { diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index eb6d1b9fc362..f0d9c035641d 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -5,6 +5,7 @@ */ #define _GNU_SOURCE +#include <assert.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -14,6 +15,7 @@ #include <fcntl.h> #include <sys/mman.h> #include <sys/mount.h> +#include <sys/param.h> #include <malloc.h> #include <stdbool.h> #include <time.h> @@ -29,6 +31,7 @@ uint64_t pmd_pagesize; #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx,%d" +#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" #define PFN_MASK ((1UL<<55)-1) @@ -108,38 +111,28 @@ static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hp unsigned long rss_anon_before, rss_anon_after; size_t i; - if (!check_huge_anon(one_page, 4, pmd_pagesize)) { - printf("No THP is allocated\n"); - exit(EXIT_FAILURE); - } + if (!check_huge_anon(one_page, 4, pmd_pagesize)) + ksft_exit_fail_msg("No THP is allocated\n"); rss_anon_before = rss_anon(); - if (!rss_anon_before) { - printf("No RssAnon is allocated before split\n"); - exit(EXIT_FAILURE); - } + if (!rss_anon_before) + ksft_exit_fail_msg("No RssAnon is allocated before split\n"); /* split all THPs */ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, (uint64_t)one_page + len, 0); for (i = 0; i < len; i++) - if (one_page[i] != (char)0) { - printf("%ld byte corrupted\n", i); - exit(EXIT_FAILURE); - } + if (one_page[i] != (char)0) + ksft_exit_fail_msg("%ld byte corrupted\n", i); - if (!check_huge_anon(one_page, 0, pmd_pagesize)) { - printf("Still AnonHugePages not split\n"); - exit(EXIT_FAILURE); - } + if (!check_huge_anon(one_page, 0, pmd_pagesize)) + ksft_exit_fail_msg("Still AnonHugePages not split\n"); rss_anon_after = rss_anon(); - if (rss_anon_after >= rss_anon_before) { - printf("Incorrect RssAnon value. Before: %ld After: %ld\n", + if (rss_anon_after >= rss_anon_before) + ksft_exit_fail_msg("Incorrect RssAnon value. Before: %ld After: %ld\n", rss_anon_before, rss_anon_after); - exit(EXIT_FAILURE); - } } void split_pmd_zero_pages(void) @@ -150,11 +143,11 @@ void split_pmd_zero_pages(void) one_page = allocate_zero_filled_hugepage(len); verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); - printf("Split zero filled huge pages successful\n"); + ksft_test_result_pass("Split zero filled huge pages successful\n"); free(one_page); } -void split_pmd_thp(void) +void split_pmd_thp_to_order(int order) { char *one_page; size_t len = 4 * pmd_pagesize; @@ -174,7 +167,7 @@ void split_pmd_thp(void) /* split all THPs */ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, - (uint64_t)one_page + len, 0); + (uint64_t)one_page + len, order); for (i = 0; i < len; i++) if (one_page[i] != (char)i) @@ -184,7 +177,7 @@ void split_pmd_thp(void) if (!check_huge_anon(one_page, 0, pmd_pagesize)) ksft_exit_fail_msg("Still AnonHugePages not split\n"); - ksft_test_result_pass("Split huge pages successful\n"); + ksft_test_result_pass("Split huge pages to order %d successful\n", order); free(one_page); } @@ -271,18 +264,32 @@ void split_pte_mapped_thp(void) close(kpageflags_fd); } -void split_file_backed_thp(void) +void split_file_backed_thp(int order) { int status; int fd; - ssize_t num_written; char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; const char *tmpfs_loc = mkdtemp(tmpfs_template); char testfile[INPUT_MAX]; + ssize_t num_written, num_read; + char *file_buf1, *file_buf2; uint64_t pgoff_start = 0, pgoff_end = 1024; + int i; ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n"); + file_buf1 = (char *)malloc(pmd_pagesize); + file_buf2 = (char *)malloc(pmd_pagesize); + + if (!file_buf1 || !file_buf2) { + ksft_print_msg("cannot allocate file buffers\n"); + goto out; + } + + for (i = 0; i < pmd_pagesize; i++) + file_buf1[i] = (char)i; + memset(file_buf2, 0, pmd_pagesize); + status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); if (status) @@ -291,26 +298,45 @@ void split_file_backed_thp(void) status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); if (status >= INPUT_MAX) { ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n"); + goto cleanup; } - fd = open(testfile, O_CREAT|O_WRONLY, 0664); + fd = open(testfile, O_CREAT|O_RDWR, 0664); if (fd == -1) { ksft_perror("Cannot open testing file"); goto cleanup; } - /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); - close(fd); + /* write pmd size data to the file, so a file-backed THP can be allocated */ + num_written = write(fd, file_buf1, pmd_pagesize); - if (num_written < 1) { - ksft_perror("Fail to write data to testing file"); - goto cleanup; + if (num_written == -1 || num_written != pmd_pagesize) { + ksft_perror("Failed to write data to testing file"); + goto close_file; } /* split the file-backed THP */ - write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0); + write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, order); + + /* check file content after split */ + status = lseek(fd, 0, SEEK_SET); + if (status == -1) { + ksft_perror("Cannot lseek file"); + goto close_file; + } + + num_read = read(fd, file_buf2, num_written); + if (num_read == -1 || num_read != num_written) { + ksft_perror("Cannot read file content back"); + goto close_file; + } + + if (strncmp(file_buf1, file_buf2, pmd_pagesize) != 0) { + ksft_print_msg("File content changed\n"); + goto close_file; + } + close(fd); status = unlink(testfile); if (status) { ksft_perror("Cannot remove testing file"); @@ -328,12 +354,15 @@ void split_file_backed_thp(void) ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno)); ksft_print_msg("Please check dmesg for more information\n"); - ksft_test_result_pass("File-backed THP split test done\n"); + ksft_test_result_pass("File-backed THP split to order %d test done\n", order); return; +close_file: + close(fd); cleanup: umount(tmpfs_loc); rmdir(tmpfs_loc); +out: ksft_exit_fail_msg("Error occurred\n"); } @@ -371,6 +400,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, { size_t i; int dummy = 0; + unsigned char buf[1024]; srand(time(NULL)); @@ -378,11 +408,12 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd, if (*fd == -1) ksft_exit_fail_msg("Failed to create a file at %s\n", testfile); - for (i = 0; i < fd_size; i++) { - unsigned char byte = (unsigned char)i; + assert(fd_size % sizeof(buf) == 0); + for (i = 0; i < sizeof(buf); i++) + buf[i] = (unsigned char)i; + for (i = 0; i < fd_size; i += sizeof(buf)) + write(*fd, buf, sizeof(buf)); - write(*fd, &byte, sizeof(byte)); - } close(*fd); sync(); *fd = open("/proc/sys/vm/drop_caches", O_WRONLY); @@ -430,7 +461,8 @@ err_out_unlink: return -1; } -void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc) +void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, + int order, int offset) { int fd; char *addr; @@ -448,7 +480,12 @@ void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_l return; err = 0; - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); + if (offset == -1) + write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order); + else + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, + (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) if (*(addr + i) != (char)i) { @@ -467,9 +504,15 @@ out: munmap(addr, fd_size); close(fd); unlink(testfile); - if (err) - ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); - ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + if (offset == -1) { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order); + } else { + if (err) + ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d at in-folio offset %d failed\n", order, offset); + ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d at in-folio offset %d passed\n", order, offset); + } } int main(int argc, char **argv) @@ -480,6 +523,7 @@ int main(int argc, char **argv) char fs_loc_template[] = "/tmp/thp_fs_XXXXXX"; const char *fs_loc; bool created_tmp; + int offset; ksft_print_header(); @@ -491,7 +535,7 @@ int main(int argc, char **argv) if (argc > 1) optional_xfs_path = argv[1]; - ksft_set_plan(3+9); + ksft_set_plan(1+8+1+9+9+8*4+2); pagesize = getpagesize(); pageshift = ffs(pagesize) - 1; @@ -502,14 +546,25 @@ int main(int argc, char **argv) fd_size = 2 * pmd_pagesize; split_pmd_zero_pages(); - split_pmd_thp(); + + for (i = 0; i < 9; i++) + if (i != 1) + split_pmd_thp_to_order(i); + split_pte_mapped_thp(); - split_file_backed_thp(); + for (i = 0; i < 9; i++) + split_file_backed_thp(i); created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template, &fs_loc); for (i = 8; i >= 0; i--) - split_thp_in_pagecache_to_order(fd_size, i, fs_loc); + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, -1); + + for (i = 0; i < 9; i++) + for (offset = 0; + offset < pmd_pagesize / pagesize; + offset += MAX(pmd_pagesize / pagesize / 4, 1 << i)) + split_thp_in_pagecache_to_order_at(fd_size, fs_loc, i, offset); cleanup_thp_fs(fs_loc, created_tmp); ksft_finished(); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index 577eaab6266f..ad872af1c81a 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -87,7 +87,7 @@ int write_file(const char *path, const char *buf, size_t buflen) return (unsigned int) numwritten; } -const unsigned long read_num(const char *path) +unsigned long read_num(const char *path) { char buf[21]; @@ -172,7 +172,7 @@ void thp_write_string(const char *name, const char *val) } } -const unsigned long thp_read_num(const char *name) +unsigned long thp_read_num(const char *name) { char path[PATH_MAX]; int ret; diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 876235a23460..fc131d23d593 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -64,12 +64,12 @@ struct thp_settings { int read_file(const char *path, char *buf, size_t buflen); int write_file(const char *path, const char *buf, size_t buflen); -const unsigned long read_num(const char *path); +unsigned long read_num(const char *path); void write_num(const char *path, unsigned long num); int thp_read_string(const char *name, const char * const strings[]); void thp_write_string(const char *name, const char *val); -const unsigned long thp_read_num(const char *name); +unsigned long thp_read_num(const char *name); void thp_write_num(const char *name, unsigned long num); void thp_write_settings(struct thp_settings *settings); diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index e4370b79b62f..95b6f043a3cb 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -77,7 +77,7 @@ void show(unsigned long ps) system(buf); } -unsigned long read_sysfs(int warn, char *fmt, ...) +unsigned long thuge_read_sysfs(int warn, char *fmt, ...) { char *line = NULL; size_t linelen = 0; @@ -106,7 +106,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...) unsigned long read_free(unsigned long ps) { - return read_sysfs(ps != getpagesize(), + return thuge_read_sysfs(ps != getpagesize(), "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages", ps >> 10); } @@ -127,7 +127,7 @@ void test_mmap(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s mmap\n", __func__); + "%s mmap %lu %x\n", __func__, size, flags); if (munmap(map, size * NUM_PAGES)) ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno)); @@ -165,7 +165,7 @@ void test_shmget(unsigned long size, unsigned flags) show(size); ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES, - "%s: mmap\n", __func__); + "%s: mmap %lu %x\n", __func__, size, flags); if (shmdt(map)) ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno)); } @@ -195,7 +195,7 @@ void find_pagesizes(void) } globfree(&g); - if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) + if (thuge_read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES); diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 7ad6ba660c7d..a37088a23ffe 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -10,7 +10,7 @@ #define BASE_PMD_ADDR ((void *)(1UL << 30)) volatile bool test_uffdio_copy_eexist = true; -unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; int uffd = -1, uffd_flags, finished, *pipefd, test_type; bool map_shared; @@ -269,7 +269,7 @@ void uffd_test_ctx_clear(void) size_t i; if (pipefd) { - for (i = 0; i < nr_cpus * 2; ++i) { + for (i = 0; i < nr_parallel * 2; ++i) { if (close(pipefd[i])) err("close pipefd"); } @@ -323,7 +323,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) ret = userfaultfd_open(&features); if (ret) { if (errmsg) - *errmsg = "possible lack of priviledge"; + *errmsg = "possible lack of privilege"; return ret; } @@ -348,7 +348,7 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) /* * After initialization of area_src, we must explicitly release pages * for area_dst to make sure it's fully empty. Otherwise we could have - * some area_dst pages be errornously initialized with zero pages, + * some area_dst pages be erroneously initialized with zero pages, * hence we could hit memory corruption later in the test. * * One example is when THP is globally enabled, above allocate_area() @@ -365,10 +365,10 @@ int uffd_test_ctx_init(uint64_t features, const char **errmsg) */ uffd_test_ops->release_pages(area_dst); - pipefd = malloc(sizeof(int) * nr_cpus * 2); + pipefd = malloc(sizeof(int) * nr_parallel * 2); if (!pipefd) err("pipefd"); - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK)) err("pipe"); diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index a70ae10b5f62..7700cbfa3975 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -98,7 +98,7 @@ struct uffd_test_case_ops { }; typedef struct uffd_test_case_ops uffd_test_case_ops_t; -extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size; +extern unsigned long nr_parallel, nr_pages, nr_pages_per_cpu, page_size; extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap; extern int uffd, uffd_flags, finished, *pipefd, test_type; extern bool map_shared; diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index 944d559ade21..40af7f67c407 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -180,12 +180,12 @@ static void *background_thread(void *arg) static int stress(struct uffd_args *args) { unsigned long cpu; - pthread_t locking_threads[nr_cpus]; - pthread_t uffd_threads[nr_cpus]; - pthread_t background_threads[nr_cpus]; + pthread_t locking_threads[nr_parallel]; + pthread_t uffd_threads[nr_parallel]; + pthread_t background_threads[nr_parallel]; finished = 0; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { if (pthread_create(&locking_threads[cpu], &attr, locking_thread, (void *)cpu)) return 1; @@ -203,7 +203,7 @@ static int stress(struct uffd_args *args) background_thread, (void *)cpu)) return 1; } - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(background_threads[cpu], NULL)) return 1; @@ -219,11 +219,11 @@ static int stress(struct uffd_args *args) uffd_test_ops->release_pages(area_src); finished = 1; - for (cpu = 0; cpu < nr_cpus; cpu++) + for (cpu = 0; cpu < nr_parallel; cpu++) if (pthread_join(locking_threads[cpu], NULL)) return 1; - for (cpu = 0; cpu < nr_cpus; cpu++) { + for (cpu = 0; cpu < nr_parallel; cpu++) { char c; if (bounces & BOUNCE_POLL) { if (write(pipefd[cpu*2+1], &c, 1) != 1) @@ -246,11 +246,11 @@ static int userfaultfd_stress(void) { void *area; unsigned long nr; - struct uffd_args args[nr_cpus]; + struct uffd_args args[nr_parallel]; uint64_t mem_size = nr_pages * page_size; int flags = 0; - memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + memset(args, 0, sizeof(struct uffd_args) * nr_parallel); if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON) flags = UFFD_FEATURE_WP_UNPOPULATED; @@ -325,7 +325,7 @@ static int userfaultfd_stress(void) */ uffd_test_ops->release_pages(area_dst); - uffd_stats_reset(args, nr_cpus); + uffd_stats_reset(args, nr_parallel); /* bounce pass */ if (stress(args)) { @@ -359,7 +359,7 @@ static int userfaultfd_stress(void) swap(area_src_alias, area_dst_alias); - uffd_stats_report(args, nr_cpus); + uffd_stats_report(args, nr_parallel); } uffd_test_ctx_clear(); @@ -412,8 +412,8 @@ static void parse_test_type_arg(const char *raw_type) * feature. */ - if (uffd_get_features(&features)) - err("failed to get available features"); + if (uffd_get_features(&features) && errno == ENOENT) + ksft_exit_skip("failed to get available features (%d)\n", errno); test_uffdio_wp = test_uffdio_wp && (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP); @@ -435,6 +435,7 @@ static void sigalrm(int sig) int main(int argc, char **argv) { + unsigned long nr_cpus; size_t bytes; if (argc < 4) @@ -454,10 +455,19 @@ int main(int argc, char **argv) } nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (nr_cpus > 32) { + /* Don't let calculation below go to zero. */ + ksft_print_msg("_SC_NPROCESSORS_ONLN (%lu) too large, capping nr_threads to 32\n", + nr_cpus); + nr_parallel = 32; + } else { + nr_parallel = nr_cpus; + } - nr_pages_per_cpu = bytes / page_size / nr_cpus; + nr_pages_per_cpu = bytes / page_size / nr_parallel; if (!nr_pages_per_cpu) { - _err("invalid MiB"); + _err("pages_per_cpu = 0, cannot test (%lu / %lu / %lu)", + bytes, page_size, nr_parallel); usage(); } @@ -466,7 +476,7 @@ int main(int argc, char **argv) _err("invalid bounces"); usage(); } - nr_pages = nr_pages_per_cpu * nr_cpus; + nr_pages = nr_pages_per_cpu * nr_parallel; printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n", nr_pages, nr_pages_per_cpu); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 3ddbb0a71b9c..c73fd5d455c8 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -26,6 +26,8 @@ #define ALIGN_UP(x, align_to) \ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1))) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + struct mem_type { const char *name; unsigned int mem_flag; @@ -196,9 +198,10 @@ uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test, else page_size = psize(); - nr_pages = UFFD_TEST_MEM_SIZE / page_size; + /* Ensure we have at least 2 pages */ + nr_pages = MAX(UFFD_TEST_MEM_SIZE, page_size * 2) / page_size; /* TODO: remove this global var.. it's so ugly */ - nr_cpus = 1; + nr_parallel = 1; /* Initialize test arguments */ args->mem_type = mem_type; @@ -1123,7 +1126,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, char c; unsigned long long count; struct uffd_args args = { 0 }; - char *orig_area_src, *orig_area_dst; + char *orig_area_src = NULL, *orig_area_dst = NULL; unsigned long step_size, step_count; unsigned long src_offs = 0; unsigned long dst_offs = 0; @@ -1191,7 +1194,7 @@ uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size, nr, count, count_verify[src_offs + nr + i]); } } - if (step_size > page_size) { + if (chunk_size > page_size) { area_src = orig_area_src; area_dst = orig_area_dst; } @@ -1228,6 +1231,182 @@ static void uffd_move_pmd_split_test(uffd_test_args_t *targs) uffd_move_pmd_handle_fault); } +static bool +uffdio_verify_results(const char *name, int ret, int error, long result) +{ + /* + * Should always return -1 with errno=EAGAIN, with corresponding + * result field updated in ioctl() args to be -EAGAIN too + * (e.g. copy.copy field for UFFDIO_COPY). + */ + if (ret != -1) { + uffd_test_fail("%s should have returned -1", name); + return false; + } + + if (error != EAGAIN) { + uffd_test_fail("%s should have errno==EAGAIN", name); + return false; + } + + if (result != -EAGAIN) { + uffd_test_fail("%s should have been updated for -EAGAIN", + name); + return false; + } + + return true; +} + +/* + * This defines a function to test one ioctl. Note that here "field" can + * be 1 or anything not -EAGAIN. With that initial value set, we can + * verify later that it should be updated by kernel (when -EAGAIN + * returned), by checking whether it is also updated to -EAGAIN. + */ +#define DEFINE_MMAP_CHANGING_TEST(name, ioctl_name, field) \ + static bool uffdio_mmap_changing_test_##name(int fd) \ + { \ + int ret; \ + struct uffdio_##name args = { \ + .field = 1, \ + }; \ + ret = ioctl(fd, ioctl_name, &args); \ + return uffdio_verify_results(#ioctl_name, ret, errno, args.field); \ + } + +DEFINE_MMAP_CHANGING_TEST(zeropage, UFFDIO_ZEROPAGE, zeropage) +DEFINE_MMAP_CHANGING_TEST(copy, UFFDIO_COPY, copy) +DEFINE_MMAP_CHANGING_TEST(move, UFFDIO_MOVE, move) +DEFINE_MMAP_CHANGING_TEST(poison, UFFDIO_POISON, updated) +DEFINE_MMAP_CHANGING_TEST(continue, UFFDIO_CONTINUE, mapped) + +typedef enum { + /* We actually do not care about any state except UNINTERRUPTIBLE.. */ + THR_STATE_UNKNOWN = 0, + THR_STATE_UNINTERRUPTIBLE, +} thread_state; + +static void sleep_short(void) +{ + usleep(1000); +} + +static thread_state thread_state_get(pid_t tid) +{ + const char *header = "State:\t"; + char tmp[256], *p, c; + FILE *fp; + + snprintf(tmp, sizeof(tmp), "/proc/%d/status", tid); + fp = fopen(tmp, "r"); + + if (!fp) + return THR_STATE_UNKNOWN; + + while (fgets(tmp, sizeof(tmp), fp)) { + p = strstr(tmp, header); + if (p) { + /* For example, "State:\tD (disk sleep)" */ + c = *(p + sizeof(header) - 1); + return c == 'D' ? + THR_STATE_UNINTERRUPTIBLE : THR_STATE_UNKNOWN; + } + } + + return THR_STATE_UNKNOWN; +} + +static void thread_state_until(pid_t tid, thread_state state) +{ + thread_state s; + + do { + s = thread_state_get(tid); + sleep_short(); + } while (s != state); +} + +static void *uffd_mmap_changing_thread(void *opaque) +{ + volatile pid_t *pid = opaque; + int ret; + + /* Unfortunately, it's only fetch-able from the thread itself.. */ + assert(*pid == 0); + *pid = syscall(SYS_gettid); + + /* Inject an event, this will hang solid until the event read */ + ret = madvise(area_dst, page_size, MADV_REMOVE); + if (ret) + err("madvise(MADV_REMOVE) failed"); + + return NULL; +} + +static void uffd_consume_message(int fd) +{ + struct uffd_msg msg = { 0 }; + + while (uffd_read_msg(fd, &msg)); +} + +static void uffd_mmap_changing_test(uffd_test_args_t *targs) +{ + /* + * This stores the real PID (which can be different from how tid is + * defined..) for the child thread, 0 means not initialized. + */ + pid_t pid = 0; + pthread_t tid; + int ret; + + if (uffd_register(uffd, area_dst, nr_pages * page_size, + true, false, false)) + err("uffd_register() failed"); + + /* Create a thread to generate the racy event */ + ret = pthread_create(&tid, NULL, uffd_mmap_changing_thread, &pid); + if (ret) + err("pthread_create() failed"); + + /* + * Wait until the thread setup the pid. Use volatile to make sure + * it reads from RAM not regs. + */ + while (!(volatile pid_t)pid) + sleep_short(); + + /* Wait until the thread hangs at REMOVE event */ + thread_state_until(pid, THR_STATE_UNINTERRUPTIBLE); + + if (!uffdio_mmap_changing_test_copy(uffd)) + return; + + if (!uffdio_mmap_changing_test_zeropage(uffd)) + return; + + if (!uffdio_mmap_changing_test_move(uffd)) + return; + + if (!uffdio_mmap_changing_test_poison(uffd)) + return; + + if (!uffdio_mmap_changing_test_continue(uffd)) + return; + + /* + * All succeeded above! Recycle everything. Start by reading the + * event so as to kick the thread roll again.. + */ + uffd_consume_message(uffd); + + ret = pthread_join(tid, NULL); + assert(ret == 0); + + uffd_test_pass(); +} + static int prevent_hugepages(const char **errmsg) { /* This should be done before source area is populated */ @@ -1467,6 +1646,32 @@ uffd_test_case_t uffd_tests[] = { .mem_targets = MEM_ALL, .uffd_feature_required = UFFD_FEATURE_POISON, }, + { + .name = "mmap-changing", + .uffd_fn = uffd_mmap_changing_test, + /* + * There's no point running this test over all mem types as + * they share the same code paths. + * + * Choose shmem for simplicity, because (1) shmem supports + * MINOR mode to cover UFFDIO_CONTINUE, and (2) shmem is + * almost always available (unlike hugetlb). Here we + * abused SHMEM for UFFDIO_MOVE, but the test we want to + * cover doesn't yet need the correct memory type.. + */ + .mem_targets = MEM_SHMEM, + /* + * Any UFFD_FEATURE_EVENT_* should work to trigger the + * race logically, but choose the simplest (REMOVE). + * + * Meanwhile, since we'll cover quite a few new ioctl()s + * (CONTINUE, POISON, MOVE), skip this test for old kernels + * by choosing all of them. + */ + .uffd_feature_required = UFFD_FEATURE_EVENT_REMOVE | + UFFD_FEATURE_MOVE | UFFD_FEATURE_POISON | + UFFD_FEATURE_MINOR_SHMEM, + }, }; static void usage(const char *prog) diff --git a/tools/testing/selftests/mm/uffd-wp-mremap.c b/tools/testing/selftests/mm/uffd-wp-mremap.c new file mode 100644 index 000000000000..c2ba7d46c7b4 --- /dev/null +++ b/tools/testing/selftests/mm/uffd-wp-mremap.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define _GNU_SOURCE +#include <stdbool.h> +#include <stdint.h> +#include <fcntl.h> +#include <assert.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include "../kselftest.h" +#include "thp_settings.h" +#include "uffd-common.h" + +static int pagemap_fd; +static size_t pagesize; +static int nr_pagesizes = 1; +static int nr_thpsizes; +static size_t thpsizes[20]; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; + +static int sz2ord(size_t size) +{ + return __builtin_ctzll(size / pagesize); +} + +static int detect_thp_sizes(size_t sizes[], int max) +{ + int count = 0; + unsigned long orders; + size_t kb; + int i; + + /* thp not supported at all. */ + if (!read_pmd_pagesize()) + return 0; + + orders = thp_supported_orders(); + + for (i = 0; orders && count < max; i++) { + if (!(orders & (1UL << i))) + continue; + orders &= ~(1UL << i); + kb = (pagesize >> 10) << i; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb); + } + + return count; +} + +static void *mmap_aligned(size_t size, int prot, int flags) +{ + size_t mmap_size = size * 2; + char *mmap_mem, *mem; + + mmap_mem = mmap(NULL, mmap_size, prot, flags, -1, 0); + if (mmap_mem == MAP_FAILED) + return mmap_mem; + + mem = (char *)(((uintptr_t)mmap_mem + size - 1) & ~(size - 1)); + munmap(mmap_mem, mem - mmap_mem); + munmap(mem + size, mmap_mem + mmap_size - mem - size); + + return mem; +} + +static void *alloc_one_folio(size_t size, bool private, bool hugetlb) +{ + bool thp = !hugetlb && size > pagesize; + int flags = MAP_ANONYMOUS; + int prot = PROT_READ | PROT_WRITE; + char *mem, *addr; + + assert((size & (size - 1)) == 0); + + if (private) + flags |= MAP_PRIVATE; + else + flags |= MAP_SHARED; + + /* + * For THP, we must explicitly enable the THP size, allocate twice the + * required space then manually align. + */ + if (thp) { + struct thp_settings settings = *thp_current_settings(); + + if (private) + settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS; + else + settings.shmem_hugepages[sz2ord(size)].enabled = SHMEM_ALWAYS; + + thp_push_settings(&settings); + + mem = mmap_aligned(size, prot, flags); + } else { + if (hugetlb) { + flags |= MAP_HUGETLB; + flags |= __builtin_ctzll(size) << MAP_HUGE_SHIFT; + } + + mem = mmap(NULL, size, prot, flags, -1, 0); + } + + if (mem == MAP_FAILED) { + mem = NULL; + goto out; + } + + assert(((uintptr_t)mem & (size - 1)) == 0); + + /* + * Populate the folio by writing the first byte and check that all pages + * are populated. Finally set the whole thing to non-zero data to avoid + * kernel from mapping it back to the zero page. + */ + mem[0] = 1; + for (addr = mem; addr < mem + size; addr += pagesize) { + if (!pagemap_is_populated(pagemap_fd, addr)) { + munmap(mem, size); + mem = NULL; + goto out; + } + } + memset(mem, 1, size); +out: + if (thp) + thp_pop_settings(); + + return mem; +} + +static bool check_uffd_wp_state(void *mem, size_t size, bool expect) +{ + uint64_t pte; + void *addr; + + for (addr = mem; addr < mem + size; addr += pagesize) { + pte = pagemap_get_entry(pagemap_fd, addr); + if (!!(pte & PM_UFFD_WP) != expect) { + ksft_test_result_fail("uffd-wp not %s for pte %lu!\n", + expect ? "set" : "clear", + (addr - mem) / pagesize); + return false; + } + } + + return true; +} + +static bool range_is_swapped(void *addr, size_t size) +{ + for (; size; addr += pagesize, size -= pagesize) + if (!pagemap_is_swapped(pagemap_fd, addr)) + return false; + return true; +} + +static void test_one_folio(size_t size, bool private, bool swapout, bool hugetlb) +{ + struct uffdio_writeprotect wp_prms; + uint64_t features = 0; + void *addr = NULL; + void *mem = NULL; + + assert(!(hugetlb && swapout)); + + ksft_print_msg("[RUN] %s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n", + __func__, + size, + private ? "true" : "false", + swapout ? "true" : "false", + hugetlb ? "true" : "false"); + + /* Allocate a folio of required size and type. */ + mem = alloc_one_folio(size, private, hugetlb); + if (!mem) { + ksft_test_result_fail("alloc_one_folio() failed\n"); + goto out; + } + + /* Register range for uffd-wp. */ + if (userfaultfd_open(&features)) { + if (errno == ENOENT) + ksft_test_result_skip("userfaultfd not available\n"); + else + ksft_test_result_fail("userfaultfd_open() failed\n"); + goto out; + } + if (uffd_register(uffd, mem, size, false, true, false)) { + ksft_test_result_fail("uffd_register() failed\n"); + goto out; + } + wp_prms.mode = UFFDIO_WRITEPROTECT_MODE_WP; + wp_prms.range.start = (uintptr_t)mem; + wp_prms.range.len = size; + if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp_prms)) { + ksft_test_result_fail("ioctl(UFFDIO_WRITEPROTECT) failed\n"); + goto out; + } + + if (swapout) { + madvise(mem, size, MADV_PAGEOUT); + if (!range_is_swapped(mem, size)) { + ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n"); + goto out; + } + } + + /* Check that uffd-wp is set for all PTEs in range. */ + if (!check_uffd_wp_state(mem, size, true)) + goto out; + + /* + * Move the mapping to a new, aligned location. Since + * UFFD_FEATURE_EVENT_REMAP is not set, we expect the uffd-wp bit for + * each PTE to be cleared in the new mapping. + */ + addr = mmap_aligned(size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS); + if (addr == MAP_FAILED) { + ksft_test_result_fail("mmap_aligned() failed\n"); + goto out; + } + if (mremap(mem, size, size, MREMAP_FIXED | MREMAP_MAYMOVE, addr) == MAP_FAILED) { + ksft_test_result_fail("mremap() failed\n"); + munmap(addr, size); + goto out; + } + mem = addr; + + /* Check that uffd-wp is cleared for all PTEs in range. */ + if (!check_uffd_wp_state(mem, size, false)) + goto out; + + ksft_test_result_pass("%s(size=%zu, private=%s, swapout=%s, hugetlb=%s)\n", + __func__, + size, + private ? "true" : "false", + swapout ? "true" : "false", + hugetlb ? "true" : "false"); +out: + if (mem) + munmap(mem, size); + if (uffd >= 0) { + close(uffd); + uffd = -1; + } +} + +struct testcase { + size_t *sizes; + int *nr_sizes; + bool private; + bool swapout; + bool hugetlb; +}; + +static const struct testcase testcases[] = { + /* base pages. */ + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = false, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = true, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = false, + .swapout = true, + .hugetlb = false, + }, + { + .sizes = &pagesize, + .nr_sizes = &nr_pagesizes, + .private = true, + .swapout = true, + .hugetlb = false, + }, + + /* thp. */ + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = false, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = true, + .swapout = false, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = false, + .swapout = true, + .hugetlb = false, + }, + { + .sizes = thpsizes, + .nr_sizes = &nr_thpsizes, + .private = true, + .swapout = true, + .hugetlb = false, + }, + + /* hugetlb. */ + { + .sizes = hugetlbsizes, + .nr_sizes = &nr_hugetlbsizes, + .private = false, + .swapout = false, + .hugetlb = true, + }, + { + .sizes = hugetlbsizes, + .nr_sizes = &nr_hugetlbsizes, + .private = true, + .swapout = false, + .hugetlb = true, + }, +}; + +int main(int argc, char **argv) +{ + struct thp_settings settings; + int i, j, plan = 0; + + pagesize = getpagesize(); + nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes)); + nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, + ARRAY_SIZE(hugetlbsizes)); + + /* If THP is supported, save THP settings and initially disable THP. */ + if (nr_thpsizes) { + thp_save_settings(); + thp_read_settings(&settings); + for (i = 0; i < NR_ORDERS; i++) { + settings.hugepages[i].enabled = THP_NEVER; + settings.shmem_hugepages[i].enabled = SHMEM_NEVER; + } + thp_push_settings(&settings); + } + + for (i = 0; i < ARRAY_SIZE(testcases); i++) + plan += *testcases[i].nr_sizes; + ksft_set_plan(plan); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + if (pagemap_fd < 0) + ksft_exit_fail_msg("opening pagemap failed\n"); + + for (i = 0; i < ARRAY_SIZE(testcases); i++) { + const struct testcase *tc = &testcases[i]; + + for (j = 0; j < *tc->nr_sizes; j++) + test_one_folio(tc->sizes[j], tc->private, tc->swapout, + tc->hugetlb); + } + + /* If THP is supported, restore original THP settings. */ + if (nr_thpsizes) + thp_restore_settings(); + + i = ksft_get_fail_cnt(); + if (i) + ksft_exit_fail_msg("%d out of %d tests failed\n", + i, ksft_test_num()); + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 2c725773cd79..325de53966b6 100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh @@ -7,23 +7,20 @@ # real test to check that the kernel is configured to support at least 5 # pagetable levels. -# 1 means the test failed -exitcode=1 - # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 -fail() +skip() { echo "$1" - exit $exitcode + exit $ksft_skip } check_supported_x86_64() { local config="/proc/config.gz" [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" - [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" # gzip -dcfq automatically handles both compressed and plaintext input. # See man 1 gzip under '-f'. @@ -33,11 +30,31 @@ check_supported_x86_64() else {print 1}; exit}' /proc/cpuinfo 2>/dev/null) if [[ "${pg_table_levels}" -lt 5 ]]; then - echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" - exit $ksft_skip + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then - echo "$0: CPU does not have the necessary la57 flag to support page table level 5" - exit $ksft_skip + skip "$0: CPU does not have the necessary la57 flag to support page table level 5" + fi +} + +check_supported_ppc64() +{ + local config="/proc/config.gz" + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" + [[ -f "${config}" ]] || skip "Cannot find kernel config in /proc or /boot" + + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + if [[ "${pg_table_levels}" -lt 5 ]]; then + skip "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" + fi + + local mmu_support=$(grep -m1 "mmu" /proc/cpuinfo | awk '{print $3}') + if [[ "$mmu_support" != "radix" ]]; then + skip "$0: System does not use Radix MMU, required for 5-level paging" + fi + + local hugepages_total=$(awk '/HugePages_Total/ {print $2}' /proc/meminfo) + if [[ "${hugepages_total}" -eq 0 ]]; then + skip "$0: HugePages are not enabled, required for some tests" fi } @@ -50,6 +67,9 @@ check_test_requirements() "x86_64") check_supported_x86_64 ;; + "ppc64le"|"ppc64") + check_supported_ppc64 + ;; *) return 0 ;; diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 2a2b69e91950..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -10,10 +10,12 @@ #include <string.h> #include <unistd.h> #include <errno.h> +#include <sys/prctl.h> #include <sys/mman.h> #include <sys/time.h> #include <fcntl.h> +#include "vm_util.h" #include "../kselftest.h" /* @@ -75,13 +77,34 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); } +static void mark_range(char *ptr, size_t size) +{ + if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, size, "virtual_address_range") == -1) { + if (errno == EINVAL) { + /* Depends on CONFIG_ANON_VMA_NAME */ + ksft_test_result_skip("prctl(PR_SET_VMA_ANON_NAME) not supported\n"); + ksft_finished(); + } else { + ksft_exit_fail_perror("prctl(PR_SET_VMA_ANON_NAME) failed\n"); + } + } +} + +static int is_marked_vma(const char *vma_name) +{ + return vma_name && !strcmp(vma_name, "[anon:virtual_address_range]\n"); +} + static int validate_lower_address_hint(void) { char *ptr; @@ -116,12 +139,17 @@ static int validate_complete_va_space(void) prev_end_addr = 0; while (fgets(line, sizeof(line), file)) { + const char *vma_name = NULL; + int vma_name_start = 0; unsigned long hop; - if (sscanf(line, "%lx-%lx %s[rwxp-]", - &start_addr, &end_addr, prot) != 3) + if (sscanf(line, "%lx-%lx %4s %*s %*s %*s %n", + &start_addr, &end_addr, prot, &vma_name_start) != 3) ksft_exit_fail_msg("cannot parse /proc/self/maps\n"); + if (vma_name_start) + vma_name = line + vma_name_start; + /* end of userspace mappings; ignore vsyscall mapping */ if (start_addr & (1UL << 63)) return 0; @@ -135,6 +163,9 @@ static int validate_complete_va_space(void) if (prot[0] != 'r') continue; + if (check_vmflag_io((void *)start_addr)) + continue; + /* * Confirm whether MAP_CHUNK_SIZE chunk can be found or not. * If write succeeds, no need to check MAP_CHUNK_SIZE - 1 @@ -149,6 +180,9 @@ static int validate_complete_va_space(void) return 1; lseek(fd, 0, SEEK_SET); + if (is_marked_vma(vma_name)) + munmap((char *)(start_addr + hop), MAP_CHUNK_SIZE); + hop += MAP_CHUNK_SIZE; } } @@ -166,7 +200,7 @@ int main(int argc, char *argv[]) ksft_set_plan(1); for (i = 0; i < NR_CHUNKS_LOW; i++) { - ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (ptr[i] == MAP_FAILED) { @@ -175,6 +209,7 @@ int main(int argc, char *argv[]) break; } + mark_range(ptr[i], MAP_CHUNK_SIZE); validate_addr(ptr[i], 0); } lchunks = i; @@ -186,12 +221,13 @@ int main(int argc, char *argv[]) for (i = 0; i < NR_CHUNKS_HIGH; i++) { hint = hint_addr(); - hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, + hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (hptr[i] == MAP_FAILED) break; + mark_range(ptr[i], MAP_CHUNK_SIZE); validate_addr(hptr[i], 1); } hchunks = i; diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index d8d0cf04bb57..5492e3f784df 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> +#include <errno.h> #include <fcntl.h> #include <dirent.h> +#include <inttypes.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> #include <linux/fs.h> @@ -138,7 +140,7 @@ void clear_softdirty(void) ksft_exit_fail_msg("opening clear_refs failed\n"); ret = write(fd, ctrl, strlen(ctrl)); close(fd); - if (ret != strlen(ctrl)) + if (ret != (signed int)strlen(ctrl)) ksft_exit_fail_msg("writing clear_refs failed\n"); } @@ -193,13 +195,11 @@ err_out: return rss_anon; } -bool __check_huge(void *addr, char *pattern, int nr_hpages, - uint64_t hpage_size) +char *__get_smap_entry(void *addr, const char *pattern, char *buf, size_t len) { - uint64_t thp = -1; int ret; FILE *fp; - char buffer[MAX_LINE_LENGTH]; + char *entry = NULL; char addr_pattern[MAX_LINE_LENGTH]; ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", @@ -211,23 +211,40 @@ bool __check_huge(void *addr, char *pattern, int nr_hpages, if (!fp) ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH); - if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer))) + if (!check_for_pattern(fp, addr_pattern, buf, len)) goto err_out; - /* - * Fetch the pattern in the same block and check the number of - * hugepages. - */ - if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer))) + /* Fetch the pattern in the same block */ + if (!check_for_pattern(fp, pattern, buf, len)) goto err_out; - snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern); + /* Trim trailing newline */ + entry = strchr(buf, '\n'); + if (entry) + *entry = '\0'; - if (sscanf(buffer, addr_pattern, &thp) != 1) - ksft_exit_fail_msg("Reading smap error\n"); + entry = buf + strlen(pattern); err_out: fclose(fp); + return entry; +} + +bool __check_huge(void *addr, char *pattern, int nr_hpages, + uint64_t hpage_size) +{ + char buffer[MAX_LINE_LENGTH]; + uint64_t thp = -1; + char *entry; + + entry = __get_smap_entry(addr, pattern, buffer, sizeof(buffer)); + if (!entry) + goto err_out; + + if (sscanf(entry, "%9" SCNu64 " kB", &thp) != 1) + ksft_exit_fail_msg("Reading smap error\n"); + +err_out: return thp == (nr_hpages * (hpage_size >> 10)); } @@ -384,3 +401,126 @@ unsigned long get_free_hugepages(void) fclose(f); return fhp; } + +bool check_vmflag_io(void *addr) +{ + char buffer[MAX_LINE_LENGTH]; + const char *flags; + size_t flaglen; + + flags = __get_smap_entry(addr, "VmFlags:", buffer, sizeof(buffer)); + if (!flags) + ksft_exit_fail_msg("%s: No VmFlags for %p\n", __func__, addr); + + while (true) { + flags += strspn(flags, " "); + + flaglen = strcspn(flags, " "); + if (!flaglen) + return false; + + if (flaglen == strlen("io") && !memcmp(flags, "io", flaglen)) + return true; + + flags += flaglen; + } +} + +/* + * Open an fd at /proc/$pid/maps and configure procmap_out ready for + * PROCMAP_QUERY query. Returns 0 on success, or an error code otherwise. + */ +int open_procmap(pid_t pid, struct procmap_fd *procmap_out) +{ + char path[256]; + int ret = 0; + + memset(procmap_out, '\0', sizeof(*procmap_out)); + sprintf(path, "/proc/%d/maps", pid); + procmap_out->query.size = sizeof(procmap_out->query); + procmap_out->fd = open(path, O_RDONLY); + if (procmap_out->fd < 0) + ret = -errno; + + return ret; +} + +/* Perform PROCMAP_QUERY. Returns 0 on success, or an error code otherwise. */ +int query_procmap(struct procmap_fd *procmap) +{ + int ret = 0; + + if (ioctl(procmap->fd, PROCMAP_QUERY, &procmap->query) == -1) + ret = -errno; + + return ret; +} + +/* + * Try to find the VMA at specified address, returns true if found, false if not + * found, and the test is failed if any other error occurs. + * + * On success, procmap->query is populated with the results. + */ +bool find_vma_procmap(struct procmap_fd *procmap, void *address) +{ + int err; + + procmap->query.query_flags = 0; + procmap->query.query_addr = (unsigned long)address; + err = query_procmap(procmap); + if (!err) + return true; + + if (err != -ENOENT) + ksft_exit_fail_msg("%s: Error %d on ioctl(PROCMAP_QUERY)\n", + __func__, err); + return false; +} + +/* + * Close fd used by PROCMAP_QUERY mechanism. Returns 0 on success, or an error + * code otherwise. + */ +int close_procmap(struct procmap_fd *procmap) +{ + return close(procmap->fd); +} + +int write_sysfs(const char *file_path, unsigned long val) +{ + FILE *f = fopen(file_path, "w"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fprintf(f, "%lu", val) < 0) { + perror("fprintf"); + fclose(f); + return 1; + } + fclose(f); + + return 0; +} + +int read_sysfs(const char *file_path, unsigned long *val) +{ + FILE *f = fopen(file_path, "r"); + + if (!f) { + fprintf(stderr, "f %s\n", file_path); + perror("fopen"); + return 1; + } + if (fscanf(f, "%lu", val) != 1) { + perror("fscanf"); + fclose(f); + return 1; + } + fclose(f); + + return 0; +} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 2eaed8209925..b8136d12a0f8 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -3,13 +3,17 @@ #include <stdbool.h> #include <sys/mman.h> #include <err.h> +#include <stdarg.h> #include <strings.h> /* ffsl() */ #include <unistd.h> /* _SC_PAGESIZE */ +#include "../kselftest.h" +#include <linux/fs.h> #define BIT_ULL(nr) (1ULL << (nr)) #define PM_SOFT_DIRTY BIT_ULL(55) #define PM_MMAP_EXCLUSIVE BIT_ULL(56) #define PM_UFFD_WP BIT_ULL(57) +#define PM_GUARD_REGION BIT_ULL(58) #define PM_FILE BIT_ULL(61) #define PM_SWAP BIT_ULL(62) #define PM_PRESENT BIT_ULL(63) @@ -17,6 +21,15 @@ extern unsigned int __page_size; extern unsigned int __page_shift; +/* + * Represents an open fd and PROCMAP_QUERY state for binary (via ioctl) + * /proc/$pid/[s]maps lookup. + */ +struct procmap_fd { + int fd; + struct procmap_query query; +}; + static inline unsigned int psize(void) { if (!__page_size) @@ -31,6 +44,23 @@ static inline unsigned int pshift(void) return __page_shift; } +/* + * Plan 9 FS has bugs (at least on QEMU) where certain operations fail with + * ENOENT on unlinked files. See + * https://gitlab.com/qemu-project/qemu/-/issues/103 for some info about such + * bugs. There are rumours of NFS implementations with similar bugs. + * + * Ideally, tests should just detect filesystems known to have such issues and + * bail early. But 9pfs has the additional "feature" that it causes fstatfs to + * pass through the f_type field from the host filesystem. To avoid having to + * scrape /proc/mounts or some other hackery, tests can call this function when + * it seems such a bug might have been encountered. + */ +static inline void skip_test_dodgy_fs(const char *op_name) +{ + ksft_test_result_skip("%s failed with ENOENT. Filesystem might be buggy (9pfs?)\n", op_name); +} + uint64_t pagemap_get_entry(int fd, char *start); bool pagemap_is_softdirty(int fd, char *start); bool pagemap_is_swapped(int fd, char *start); @@ -53,6 +83,39 @@ int uffd_unregister(int uffd, void *addr, uint64_t len); int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); unsigned long get_free_hugepages(void); +bool check_vmflag_io(void *addr); +int open_procmap(pid_t pid, struct procmap_fd *procmap_out); +int query_procmap(struct procmap_fd *procmap); +bool find_vma_procmap(struct procmap_fd *procmap, void *address); +int close_procmap(struct procmap_fd *procmap); +int write_sysfs(const char *file_path, unsigned long val); +int read_sysfs(const char *file_path, unsigned long *val); + +static inline int open_self_procmap(struct procmap_fd *procmap_out) +{ + pid_t pid = getpid(); + + return open_procmap(pid, procmap_out); +} + +/* These helpers need to be inline to match the kselftest.h idiom. */ +static char test_name[1024]; + +static inline void log_test_start(const char *name, ...) +{ + va_list args; + va_start(args, name); + + vsnprintf(test_name, sizeof(test_name), name, args); + ksft_print_msg("[RUN] %s\n", test_name); + + va_end(args); +} + +static inline void log_test_result(int result) +{ + ksft_test_result_report(result, "%s\n", test_name); +} /* * On ppc64 this will only work with radix 2M hugepage size diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c index 1289d311efd7..34c91f7e6128 100644 --- a/tools/testing/selftests/mm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c @@ -89,7 +89,7 @@ int main(int argc, char **argv) size = atoi(optarg); break; case 'p': - strncpy(path, optarg, sizeof(path)); + strncpy(path, optarg, sizeof(path) - 1); break; case 'm': if (atoi(optarg) >= MAX_METHOD) { |