diff options
Diffstat (limited to 'tools/testing/selftests/mm')
23 files changed, 1100 insertions, 137 deletions
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index cfad627e8d94..02e1204971b0 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -90,6 +90,7 @@ CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_pr CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) VMTARGETS := protection_keys +VMTARGETS += pkey_sighandler_tests BINARIES_32 := $(VMTARGETS:%=%_32) BINARIES_64 := $(VMTARGETS:%=%_64) @@ -106,13 +107,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(filter $(ARCH),arm64 powerpc)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) +ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index d680c00d2853..67df7b47087f 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -254,7 +254,7 @@ function cleanup_hugetlb_memory() { local cgroup="$1" if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then echo killing write_to_hugetlbfs - killall -2 write_to_hugetlbfs + killall -2 --wait write_to_hugetlbfs wait_for_hugetlb_memory_to_get_depleted $cgroup fi set -e diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c index 267eea2e0e0b..3b1b532f1cbb 100644 --- a/tools/testing/selftests/mm/hugepage-mmap.c +++ b/tools/testing/selftests/mm/hugepage-mmap.c @@ -8,13 +8,6 @@ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this * example, the app is requesting memory of size 256MB that is backed by * huge pages. - * - * For the ia64 architecture, the Linux kernel reserves Region number 4 for - * huge pages. That means that if one requires a fixed address, a huge page - * aligned address starting with 0x800000... will be required. If a fixed - * address is not required, the kernel will select an address in the proper - * range. - * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ #define _GNU_SOURCE #include <stdlib.h> @@ -27,15 +20,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_SHARED | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_SHARED) -#endif - static void check_bytes(char *addr) { ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); @@ -74,7 +58,7 @@ int main(void) if (fd < 0) ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno)); - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + addr = mmap(NULL, LENGTH, PROTECTION, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { close(fd); ksft_exit_fail_msg("mmap(): %s\n", strerror(errno)); diff --git a/tools/testing/selftests/mm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c index 478bb1e989e9..ef06260802b5 100644 --- a/tools/testing/selftests/mm/hugepage-shm.c +++ b/tools/testing/selftests/mm/hugepage-shm.c @@ -8,13 +8,6 @@ * SHM_HUGETLB in the shmget system call to inform the kernel that it is * requesting huge pages. * - * For the ia64 architecture, the Linux kernel reserves Region number 4 for - * huge pages. That means that if one requires a fixed address, a huge page - * aligned address starting with 0x800000... will be required. If a fixed - * address is not required, the kernel will select an address in the proper - * range. - * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. - * * Note: The default shared memory limit is quite low on many kernels, * you may need to increase it via: * @@ -39,15 +32,6 @@ #define dprintf(x) printf(x) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define SHMAT_FLAGS (SHM_RND) -#else -#define ADDR (void *)(0x0UL) -#define SHMAT_FLAGS (0) -#endif - int main(void) { int shmid; @@ -61,7 +45,7 @@ int main(void) } printf("shmid: 0x%x\n", shmid); - shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + shmaddr = shmat(shmid, NULL, 0); if (shmaddr == (char *)-1) { perror("Shared memory attach failure"); shmctl(shmid, IPC_RMID, NULL); diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c index 894d28c3dd47..df366a4d1b92 100644 --- a/tools/testing/selftests/mm/hugepage-vmemmap.c +++ b/tools/testing/selftests/mm/hugepage-vmemmap.c @@ -22,20 +22,6 @@ #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1) -/* - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. - */ -#ifdef __ia64__ -#define MAP_ADDR (void *)(0x8000000000000000UL) -#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define MAP_ADDR NULL -#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - static size_t pagesize; static size_t maplength; @@ -113,7 +99,8 @@ int main(int argc, char **argv) exit(1); } - addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); + addr = mmap(NULL, maplength, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(1); diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 829320a519e7..56d4480e8d3c 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1095,7 +1095,7 @@ static void usage(void) fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); - fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon or shmem allocations.\n"); exit(1); } @@ -1209,6 +1209,8 @@ int main(int argc, char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; default_settings.hugepages[anon_order].enabled = THP_ALWAYS; + default_settings.shmem_hugepages[hpage_pmd_order].enabled = SHMEM_INHERIT; + default_settings.shmem_hugepages[anon_order].enabled = SHMEM_ALWAYS; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index a1f005a90a4f..b47399feab53 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -4,11 +4,6 @@ * system call with MAP_HUGETLB flag. Before running this program make * sure the administrator has allocated enough default sized huge pages * to cover the 256 MB allocation. - * - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. */ #include <stdlib.h> #include <stdio.h> @@ -21,15 +16,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - static void check_bytes(char *addr) { ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); @@ -60,7 +46,7 @@ int main(int argc, char **argv) void *addr; size_t hugepage_size; size_t length = LENGTH; - int flags = FLAGS; + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; int shift = 0; hugepage_size = default_huge_page_size(); @@ -85,7 +71,7 @@ int main(int argc, char **argv) ksft_print_msg("Default size hugepages\n"); ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20); - addr = mmap(ADDR, length, PROTECTION, flags, -1, 0); + addr = mmap(NULL, length, PROTECTION, flags, -1, 0); if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap: %s\n", strerror(errno)); diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 6908569ef406..64bcbb7151cf 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -15,10 +15,10 @@ #include <signal.h> #include <time.h> -#define TWOMEG (2<<20) -#define RUNTIME (20) - -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +#define TWOMEG (2<<20) +#define RUNTIME (20) +#define MAX_RETRIES 100 +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) FIXTURE(migration) { @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2) int ret, tmp; int status = 0; struct timespec ts1, ts2; + int failures = 0; if (clock_gettime(CLOCK_MONOTONIC, &ts1)) return -1; @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2) ret = move_pages(0, 1, (void **) &ptr, &n2, &status, MPOL_MF_MOVE_ALL); if (ret) { - if (ret > 0) + if (ret > 0) { + /* Migration is best effort; try again */ + if (++failures < MAX_RETRIES) + continue; printf("Didn't migrate %d pages\n", ret); + } else perror("Couldn't migrate pages"); return -2; } - + failures = 0; tmp = n2; n2 = n1; n1 = tmp; diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index bfcea5cf9a48..01675c412b2a 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -99,6 +99,16 @@ static int sys_madvise(void *start, size_t len, int types) return sret; } +static void *sys_mremap(void *addr, size_t old_len, size_t new_len, + unsigned long flags, void *new_addr) +{ + void *sret; + + errno = 0; + sret = (void *) syscall(__NR_mremap, addr, old_len, new_len, flags, new_addr); + return sret; +} + static int sys_pkey_alloc(unsigned long flags, unsigned long init_val) { int ret = syscall(__NR_pkey_alloc, flags, init_val); @@ -756,6 +766,42 @@ static void test_seal_mprotect_partial_mprotect(bool seal) REPORT_TEST_PASS(); } +static void test_seal_mprotect_partial_mprotect_tail(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + int prot; + + /* + * Check if a partial mseal (that results in two vmas) works correctly. + * It might mprotect the first, but it'll never touch the second (msealed) vma. + */ + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, size, PROT_EXEC); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0); + FAIL_TEST_IF_FALSE(prot == 0x4); + } + + REPORT_TEST_PASS(); +} + + static void test_seal_mprotect_two_vma_with_gap(bool seal) { void *ptr; @@ -973,6 +1019,36 @@ static void test_seal_munmap_vma_with_gap(bool seal) REPORT_TEST_PASS(); } +static void test_seal_munmap_partial_across_vmas(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0); + FAIL_TEST_IF_FALSE(prot == 0x4); + } + + REPORT_TEST_PASS(); +} + static void test_munmap_start_freed(bool seal) { void *ptr; @@ -1104,12 +1180,12 @@ static void test_seal_mremap_shrink(bool seal) } /* shrink from 4 pages to 2 pages. */ - ret2 = mremap(ptr, size, 2 * page_size, 0, 0); + ret2 = sys_mremap(ptr, size, 2 * page_size, 0, 0); if (seal) { - FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(ret2 == (void *) MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { - FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + FAIL_TEST_IF_FALSE(ret2 != (void *) MAP_FAILED); } @@ -1136,7 +1212,7 @@ static void test_seal_mremap_expand(bool seal) } /* expand from 2 page to 4 pages. */ - ret2 = mremap(ptr, 2 * page_size, 4 * page_size, 0, 0); + ret2 = sys_mremap(ptr, 2 * page_size, 4 * page_size, 0, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1169,7 +1245,7 @@ static void test_seal_mremap_move(bool seal) } /* move from ptr to fixed address. */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr); + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1288,7 +1364,7 @@ static void test_seal_mremap_shrink_fixed(bool seal) } /* mremap to move and shrink to fixed address */ - ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); @@ -1319,7 +1395,7 @@ static void test_seal_mremap_expand_fixed(bool seal) } /* mremap to move and expand to fixed address */ - ret2 = mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED, + ret2 = sys_mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); @@ -1350,7 +1426,7 @@ static void test_seal_mremap_move_fixed(bool seal) } /* mremap to move to fixed address */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1379,14 +1455,13 @@ static void test_seal_mremap_move_fixed_zero(bool seal) /* * MREMAP_FIXED can move the mapping to zero address */ - ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { FAIL_TEST_IF_FALSE(ret2 == 0); - } REPORT_TEST_PASS(); @@ -1409,13 +1484,13 @@ static void test_seal_mremap_move_dontunmap(bool seal) } /* mremap to move, and don't unmap src addr. */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0); + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { + /* kernel will allocate a new address */ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); - } REPORT_TEST_PASS(); @@ -1423,7 +1498,7 @@ static void test_seal_mremap_move_dontunmap(bool seal) static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) { - void *ptr; + void *ptr, *ptr2; unsigned long page_size = getpagesize(); unsigned long size = 4 * page_size; int ret; @@ -1438,24 +1513,30 @@ static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) } /* - * The 0xdeaddead should not have effect on dest addr - * when MREMAP_DONTUNMAP is set. + * The new address is any address that not allocated. + * use allocate/free to similate that. */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, - 0xdeaddead); + setup_single_address(size, &ptr2); + FAIL_TEST_IF_FALSE(ptr2 != (void *)-1); + ret = sys_munmap(ptr2, size); + FAIL_TEST_IF_FALSE(!ret); + + /* + * remap to any address. + */ + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + (void *) ptr2); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { - FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); - FAIL_TEST_IF_FALSE((long)ret2 != 0xdeaddead); - + /* remap success and return ptr2 */ + FAIL_TEST_IF_FALSE(ret2 == ptr2); } REPORT_TEST_PASS(); } - static void test_seal_merge_and_split(void) { void *ptr; @@ -1720,6 +1801,69 @@ static void test_seal_discard_ro_anon(bool seal) REPORT_TEST_PASS(); } +static void test_seal_discard_across_vmas(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + + +static void test_seal_madvise_nodiscard(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * Test a random madvise flag like MADV_RANDOM that does not touch page + * contents (and thus should work for msealed VMAs). RANDOM also happens to + * share bits with other discard-ish flags like REMOVE. + */ + ret = sys_madvise(ptr, size, MADV_RANDOM); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + int main(int argc, char **argv) { bool test_seal = seal_support(); @@ -1732,7 +1876,7 @@ int main(int argc, char **argv) if (!pkey_supported()) ksft_print_msg("PKEY not supported\n"); - ksft_set_plan(80); + ksft_set_plan(88); test_seal_addseal(); test_seal_unmapped_start(); @@ -1778,12 +1922,17 @@ int main(int argc, char **argv) test_seal_mprotect_split(false); test_seal_mprotect_split(true); + test_seal_mprotect_partial_mprotect_tail(false); + test_seal_mprotect_partial_mprotect_tail(true); + test_seal_munmap(false); test_seal_munmap(true); test_seal_munmap_two_vma(false); test_seal_munmap_two_vma(true); test_seal_munmap_vma_with_gap(false); test_seal_munmap_vma_with_gap(true); + test_seal_munmap_partial_across_vmas(false); + test_seal_munmap_partial_across_vmas(true); test_munmap_start_freed(false); test_munmap_start_freed(true); @@ -1811,8 +1960,12 @@ int main(int argc, char **argv) test_seal_mremap_move_fixed_zero(true); test_seal_mremap_move_dontunmap_anyaddr(false); test_seal_mremap_move_dontunmap_anyaddr(true); + test_seal_madvise_nodiscard(false); + test_seal_madvise_nodiscard(true); test_seal_discard_ro_anon(false); test_seal_discard_ro_anon(true); + test_seal_discard_across_vmas(false); + test_seal_discard_across_vmas(true); test_seal_discard_ro_anon_on_rw(false); test_seal_discard_ro_anon_on_rw(true); test_seal_discard_ro_anon_on_shared(false); diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index fc90af2a97b8..bcc73b4e805c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -15,7 +15,7 @@ #include <sys/ioctl.h> #include <sys/stat.h> #include <math.h> -#include <asm-generic/unistd.h> +#include <asm/unistd.h> #include <pthread.h> #include <sys/resource.h> #include <assert.h> diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h new file mode 100644 index 000000000000..580e1b0bb38e --- /dev/null +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + */ + +#ifndef _PKEYS_ARM64_H +#define _PKEYS_ARM64_H + +#include "vm_util.h" +/* for signal frame parsing */ +#include "../arm64/signal/testcases/testcases.h" + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 288 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 289 +# define SYS_pkey_free 290 +#endif +#define MCONTEXT_IP(mc) mc.pc +#define MCONTEXT_TRAPNO(mc) -1 + +#define PKEY_MASK 0xf + +#define POE_NONE 0x0 +#define POE_X 0x2 +#define POE_RX 0x3 +#define POE_RWX 0x7 + +#define NR_PKEYS 8 +#define NR_RESERVED_PKEYS 1 /* pkey-0 */ + +#define PKEY_ALLOW_ALL 0x77777777 + +#define PKEY_BITS_PER_PKEY 4 +#define PAGE_SIZE sysconf(_SC_PAGESIZE) +#undef HPAGE_SIZE +#define HPAGE_SIZE default_huge_page_size() + +/* 4-byte instructions * 16384 = 64K page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +static inline u64 __read_pkey_reg(void) +{ + u64 pkey_reg = 0; + + // POR_EL0 + asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg)); + + return pkey_reg; +} + +static inline void __write_pkey_reg(u64 pkey_reg) +{ + u64 por = pkey_reg; + + dprintf4("%s() changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); + + // POR_EL0 + asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :); + + dprintf4("%s() pkey register after changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); +} + +static inline int cpu_has_pkeys(void) +{ + /* No simple way to determine this */ + return 1; +} + +static inline u32 pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static inline int get_arch_reserved_keys(void) +{ + return NR_RESERVED_PKEYS; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ +} + +void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + return PTR_ERR_ENOTSUP; +} + +#define set_pkey_bits set_pkey_bits +static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + u32 shift = pkey_bit_position(pkey); + u64 new_val = POE_RWX; + + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + + if (flags & PKEY_DISABLE_ACCESS) + new_val = POE_X; + else if (flags & PKEY_DISABLE_WRITE) + new_val = POE_RX; + + /* OR in new bits for pkey */ + reg |= new_val << shift; + + return reg; +} + +#define get_pkey_bits get_pkey_bits +static inline u64 get_pkey_bits(u64 reg, int pkey) +{ + u32 shift = pkey_bit_position(pkey); + /* + * shift down the relevant bits to the lowest four, then + * mask off all the other higher bits + */ + u32 perm = (reg >> shift) & PKEY_MASK; + + if (perm == POE_X) + return PKEY_DISABLE_ACCESS; + if (perm == POE_RX) + return PKEY_DISABLE_WRITE; + return 0; +} + +static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +{ + struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); + struct poe_context *poe_ctx = + (struct poe_context *) get_header(ctx, POE_MAGIC, + sizeof(uctxt->uc_mcontext), NULL); + if (poe_ctx) + poe_ctx->por_el0 = pkey; +} + +#endif /* _PKEYS_ARM64_H */ diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 1af3156a9db8..9ab6a3ee153b 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -79,7 +79,18 @@ extern void abort_hooks(void); } \ } while (0) -__attribute__((noinline)) int read_ptr(int *ptr); +#define barrier() __asm__ __volatile__("": : :"memory") +#ifndef noinline +# define noinline __attribute__((noinline)) +#endif + +noinline int read_ptr(int *ptr) +{ + /* Keep GCC from optimizing this away somehow */ + barrier(); + return *ptr; +} + void expected_pkey_fault(int pkey); int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); @@ -91,12 +102,17 @@ void record_pkey_malloc(void *ptr, long size, int prot); #include "pkey-x86.h" #elif defined(__powerpc64__) /* arch */ #include "pkey-powerpc.h" +#elif defined(__aarch64__) /* arch */ +#include "pkey-arm64.h" #else /* arch */ #error Architecture not supported #endif /* arch */ +#ifndef PKEY_MASK #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif +#ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { u32 shift = pkey_bit_position(pkey); @@ -106,7 +122,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) reg |= (flags & PKEY_MASK) << shift; return reg; } +#endif +#ifndef get_pkey_bits static inline u64 get_pkey_bits(u64 reg, int pkey) { u32 shift = pkey_bit_position(pkey); @@ -116,6 +134,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) */ return ((reg >> shift) & PKEY_MASK); } +#endif extern u64 shadow_pkey_reg; diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index ae5df26104e5..3d0c0bdae5bc 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -8,7 +8,10 @@ # define SYS_pkey_free 385 #endif #define REG_IP_IDX PT_NIP +#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO] #define REG_TRAPNO PT_TRAP +#define MCONTEXT_FPREGS #define gregs gp_regs #define fpregs fp_regs #define si_pkey_offset 0x20 diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 814758e109c0..5f28e26a2511 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -15,6 +15,10 @@ #endif +#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO] +#define MCONTEXT_FPREGS + #ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 #endif diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c new file mode 100644 index 000000000000..a8088b645ad6 --- /dev/null +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) + * + * The testcases in this file exercise various flows related to signal handling, + * using an alternate signal stack, with the default pkey (pkey 0) disabled. + * + * Compile with: + * gcc -mxsave -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm + * gcc -mxsave -m32 -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <errno.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <pthread.h> +#include <limits.h> + +#include "pkey-helpers.h" + +#define STACK_SIZE PTHREAD_STACK_MIN + +void expected_pkey_fault(int pkey) {} + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +siginfo_t siginfo = {0}; + +/* + * We need to use inline assembly instead of glibc's syscall because glibc's + * syscall will attempt to access the PLT in order to call a library function + * which is protected by MPK 0 which we don't have access to. + */ +static inline __always_inline +long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) +{ + unsigned long ret; +#ifdef __x86_64__ + register long r10 asm("r10") = a4; + register long r8 asm("r8") = a5; + register long r9 asm("r9") = a6; + asm volatile ("syscall" + : "=a"(ret) + : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9) + : "rcx", "r11", "memory"); +#elif defined __i386__ + asm volatile ("int $0x80" + : "=a"(ret) + : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) + : "memory"); +#else +# error syscall_raw() not implemented +#endif + return ret; +} + +static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) +{ + pthread_mutex_lock(&mutex); + + memcpy(&siginfo, info, sizeof(siginfo_t)); + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); +} + +static void sigusr1_handler(int signo, siginfo_t *info, void *ucontext) +{ + pthread_mutex_lock(&mutex); + + memcpy(&siginfo, info, sizeof(siginfo_t)); + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +} + +static void sigusr2_handler(int signo, siginfo_t *info, void *ucontext) +{ + /* + * pkru should be the init_pkru value which enabled MPK 0 so + * we can use library functions. + */ + printf("%s invoked.\n", __func__); +} + +static void raise_sigusr2(void) +{ + pid_t tid = 0; + + tid = syscall_raw(SYS_gettid, 0, 0, 0, 0, 0, 0); + + syscall_raw(SYS_tkill, tid, SIGUSR2, 0, 0, 0, 0); + + /* + * We should return from the signal handler here and be able to + * return to the interrupted thread. + */ +} + +static void *thread_segv_with_pkey0_disabled(void *ptr) +{ + /* Disable MPK 0 (and all others too) */ + __write_pkey_reg(0x55555555); + + /* Segfault (with SEGV_MAPERR) */ + *(int *) (0x1) = 1; + return NULL; +} + +static void *thread_segv_pkuerr_stack(void *ptr) +{ + /* Disable MPK 0 (and all others too) */ + __write_pkey_reg(0x55555555); + + /* After we disable MPK 0, we can't access the stack to return */ + return NULL; +} + +static void *thread_segv_maperr_ptr(void *ptr) +{ + stack_t *stack = ptr; + int *bad = (int *)1; + + /* + * Setup alternate signal stack, which should be pkey_mprotect()ed by + * MPK 0. The thread's stack cannot be used for signals because it is + * not accessible by the default init_pkru value of 0x55555554. + */ + syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); + + /* Disable MPK 0. Only MPK 1 is enabled. */ + __write_pkey_reg(0x55555551); + + /* Segfault */ + *bad = 1; + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + return NULL; +} + +/* + * Verify that the sigsegv handler is invoked when pkey 0 is disabled. + * Note that the new thread stack and the alternate signal stack is + * protected by MPK 0. + */ +static void test_sigsegv_handler_with_pkey0_disabled(void) +{ + struct sigaction sa; + pthread_attr_t attr; + pthread_t thr; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&thr, &attr, thread_segv_with_pkey0_disabled, NULL); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_MAPERR && + siginfo.si_addr == (void *)1, + "%s\n", __func__); +} + +/* + * Verify that the sigsegv handler is invoked when pkey 0 is disabled. + * Note that the new thread stack and the alternate signal stack is + * protected by MPK 0, which renders them inaccessible when MPK 0 + * is disabled. So just the return from the thread should cause a + * segfault with SEGV_PKUERR. + */ +static void test_sigsegv_handler_cannot_access_stack(void) +{ + struct sigaction sa; + pthread_attr_t attr; + pthread_t thr; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&thr, &attr, thread_segv_pkuerr_stack, NULL); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_PKUERR, + "%s\n", __func__); +} + +/* + * Verify that the sigsegv handler that uses an alternate signal stack + * is correctly invoked for a thread which uses a non-zero MPK to protect + * its own stack, and disables all other MPKs (including 0). + */ +static void test_sigsegv_handler_with_different_pkey_for_stack(void) +{ + struct sigaction sa; + static stack_t sigstack; + void *stack; + int pkey; + int parent_pid = 0; + int child_pid = 0; + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + + sa.sa_sigaction = sigsegv_handler; + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(stack != MAP_FAILED); + + /* Allow access to MPK 0 and MPK 1 */ + __write_pkey_reg(0x55555550); + + /* Protect the new stack with MPK 1 */ + pkey = pkey_alloc(0, 0); + pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + + /* Set up alternate signal stack that will use the default MPK */ + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack.ss_flags = 0; + sigstack.ss_size = STACK_SIZE; + + memset(&siginfo, 0, sizeof(siginfo)); + + /* Use clone to avoid newer glibcs using rseq on new threads */ + long ret = syscall_raw(SYS_clone, + CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + (long) ((char *)(stack) + STACK_SIZE), + (long) &parent_pid, + (long) &child_pid, 0, 0); + + if (ret < 0) { + errno = -ret; + perror("clone"); + } else if (ret == 0) { + thread_segv_maperr_ptr(&sigstack); + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + } + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_MAPERR && + siginfo.si_addr == (void *)1, + "%s\n", __func__); +} + +/* + * Verify that the PKRU value set by the application is correctly + * restored upon return from signal handling. + */ +static void test_pkru_preserved_after_sigusr1(void) +{ + struct sigaction sa; + unsigned long pkru = 0x45454544; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigusr1_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + __write_pkey_reg(pkru); + + raise(SIGUSR1); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + /* Ensure the pkru value is the same after returning from signal. */ + ksft_test_result(pkru == __read_pkey_reg() && + siginfo.si_signo == SIGUSR1, + "%s\n", __func__); +} + +static noinline void *thread_sigusr2_self(void *ptr) +{ + /* + * A const char array like "Resuming after SIGUSR2" won't be stored on + * the stack and the code could access it via an offset from the program + * counter. This makes sure it's on the function's stack frame. + */ + char str[] = {'R', 'e', 's', 'u', 'm', 'i', 'n', 'g', ' ', + 'a', 'f', 't', 'e', 'r', ' ', + 'S', 'I', 'G', 'U', 'S', 'R', '2', + '.', '.', '.', '\n', '\0'}; + stack_t *stack = ptr; + + /* + * Setup alternate signal stack, which should be pkey_mprotect()ed by + * MPK 0. The thread's stack cannot be used for signals because it is + * not accessible by the default init_pkru value of 0x55555554. + */ + syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); + + /* Disable MPK 0. Only MPK 2 is enabled. */ + __write_pkey_reg(0x55555545); + + raise_sigusr2(); + + /* Do something, to show the thread resumed execution after the signal */ + syscall_raw(SYS_write, 1, (long) str, sizeof(str) - 1, 0, 0, 0); + + /* + * We can't return to test_pkru_sigreturn because it + * will attempt to use a %rbp value which is on the stack + * of the main thread. + */ + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + return NULL; +} + +/* + * Verify that sigreturn is able to restore altstack even if the thread had + * disabled pkey 0. + */ +static void test_pkru_sigreturn(void) +{ + struct sigaction sa = {0}; + static stack_t sigstack; + void *stack; + int pkey; + int parent_pid = 0; + int child_pid = 0; + + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + + /* + * For this testcase, we do not want to handle SIGSEGV. Reset handler + * to default so that the application can crash if it receives SIGSEGV. + */ + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sa.sa_sigaction = sigusr2_handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGUSR2, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(stack != MAP_FAILED); + + /* + * Allow access to MPK 0 and MPK 2. The child thread (to be created + * later in this flow) will have its stack protected by MPK 2, whereas + * the current thread's stack is protected by the default MPK 0. Hence + * both need to be enabled. + */ + __write_pkey_reg(0x55555544); + + /* Protect the stack with MPK 2 */ + pkey = pkey_alloc(0, 0); + pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + + /* Set up alternate signal stack that will use the default MPK */ + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack.ss_flags = 0; + sigstack.ss_size = STACK_SIZE; + + /* Use clone to avoid newer glibcs using rseq on new threads */ + long ret = syscall_raw(SYS_clone, + CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + (long) ((char *)(stack) + STACK_SIZE), + (long) &parent_pid, + (long) &child_pid, 0, 0); + + if (ret < 0) { + errno = -ret; + perror("clone"); + } else if (ret == 0) { + thread_sigusr2_self(&sigstack); + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + } + + child_pid = ret; + /* Check that thread exited */ + do { + sched_yield(); + ret = syscall_raw(SYS_tkill, child_pid, 0, 0, 0, 0, 0); + } while (ret != -ESRCH && ret != -EINVAL); + + ksft_test_result_pass("%s\n", __func__); +} + +static void (*pkey_tests[])(void) = { + test_sigsegv_handler_with_pkey0_disabled, + test_sigsegv_handler_cannot_access_stack, + test_sigsegv_handler_with_different_pkey_for_stack, + test_pkru_preserved_after_sigusr1, + test_pkru_sigreturn +}; + +int main(int argc, char *argv[]) +{ + int i; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(pkey_tests)); + + for (i = 0; i < ARRAY_SIZE(pkey_tests); i++) + (*pkey_tests[i])(); + + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index eaa6d1fc5328..4990f7ab4cb7 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -147,7 +147,7 @@ void abort_hooks(void) * will then fault, which makes sure that the fault code handles * execute-only memory properly. */ -#ifdef __powerpc64__ +#if defined(__powerpc64__) || defined(__aarch64__) /* This way, both 4K and 64K alignment are maintained */ __attribute__((__aligned__(65536))) #else @@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u64 orig_pkey_reg = read_pkey_reg(); dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - pkey_assert(read_pkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } @@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u64 orig_pkey_reg = read_pkey_reg(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); @@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - assert(read_pkey_reg() <= orig_pkey_reg); } void pkey_write_allow(int pkey) @@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; +#ifdef MCONTEXT_FPREGS char *fpregs; +#endif #if defined(__i386__) || defined(__x86_64__) /* arch */ u32 *pkey_reg_ptr; int pkey_reg_offset; @@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext); + ip = MCONTEXT_IP(uctxt->uc_mcontext); +#ifdef MCONTEXT_FPREGS fpregs = (char *) uctxt->uc_mcontext.fpregs; +#endif dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), @@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #endif /* arch */ dprintf1("siginfo: %p\n", si); +#ifdef MCONTEXT_FPREGS dprintf1(" fpregs: %p\n", fpregs); +#endif if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #elif defined(__powerpc64__) /* arch */ /* restore access and let the faulting instruction continue */ pkey_access_allow(siginfo_pkey); +#elif defined(__aarch64__) + aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL); #endif /* arch */ pkey_faults++; dprintf1("<<<<==================================================\n"); @@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey) * test program continue. We now have to restore it. */ if (__read_pkey_reg() != 0) -#else /* arch */ +#elif defined(__aarch64__) + if (__read_pkey_reg() != PKEY_ALLOW_ALL) +#else if (__read_pkey_reg() != shadow_pkey_reg) #endif /* arch */ pkey_assert(0); @@ -950,16 +954,6 @@ void close_test_fds(void) nr_test_fds = 0; } -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) { int i, err; @@ -1492,6 +1486,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) lots_o_noops_around_write(&scratch); do_not_expect_pkey_fault("executing on PROT_EXEC memory"); expect_fault_on_read_execonly_key(p1, pkey); + + // Reset back to PROT_EXEC | PROT_READ for architectures that support + // non-PKEY execute-only permissions. + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey); + pkey_assert(!ret); } void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) @@ -1665,6 +1664,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) } #endif +#if defined(__aarch64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + pid_t child; + int status, ret; + struct iovec iov; + u64 trace_pkey; + /* Just a random pkey value.. */ + u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) | + (POE_NONE << PKEY_BITS_PER_PKEY) | + POE_RWX; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkey) + exit(1); + + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + iov.iov_base = &trace_pkey; + iov.iov_len = 8; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == read_pkey_reg()); + + trace_pkey = new_pkey; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1700,7 +1777,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) test_ptrace_modifies_pkru, #endif }; diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 36045edb10de..c5797ad1d37b 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -189,7 +189,7 @@ else fi # filter 64bit architectures -ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64" +ARCH64STR="arm64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64" if [ -z "$ARCH" ]; then ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index e5e8dafc9d94..eb6d1b9fc362 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -84,6 +84,76 @@ static void write_debugfs(const char *fmt, ...) write_file(SPLIT_DEBUGFS, input, ret + 1); } +static char *allocate_zero_filled_hugepage(size_t len) +{ + char *result; + size_t i; + + result = memalign(pmd_pagesize, len); + if (!result) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(result, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + result[i] = (char)0; + + return result; +} + +static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) +{ + unsigned long rss_anon_before, rss_anon_after; + size_t i; + + if (!check_huge_anon(one_page, 4, pmd_pagesize)) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + rss_anon_before = rss_anon(); + if (!rss_anon_before) { + printf("No RssAnon is allocated before split\n"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, + (uint64_t)one_page + len, 0); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)0) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + if (!check_huge_anon(one_page, 0, pmd_pagesize)) { + printf("Still AnonHugePages not split\n"); + exit(EXIT_FAILURE); + } + + rss_anon_after = rss_anon(); + if (rss_anon_after >= rss_anon_before) { + printf("Incorrect RssAnon value. Before: %ld After: %ld\n", + rss_anon_before, rss_anon_after); + exit(EXIT_FAILURE); + } +} + +void split_pmd_zero_pages(void) +{ + char *one_page; + int nr_hpages = 4; + size_t len = nr_hpages * pmd_pagesize; + + one_page = allocate_zero_filled_hugepage(len); + verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); + printf("Split zero filled huge pages successful\n"); + free(one_page); +} + void split_pmd_thp(void) { char *one_page; @@ -431,6 +501,7 @@ int main(int argc, char **argv) fd_size = 2 * pmd_pagesize; + split_pmd_zero_pages(); split_pmd_thp(); split_pte_mapped_thp(); split_file_backed_thp(); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index a4163438108e..577eaab6266f 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -33,10 +33,11 @@ static const char * const thp_defrag_strings[] = { }; static const char * const shmem_enabled_strings[] = { + "never", "always", "within_size", "advise", - "never", + "inherit", "deny", "force", NULL @@ -200,6 +201,7 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int i; @@ -234,12 +236,24 @@ void thp_read_settings(struct thp_settings *settings) settings->hugepages[i].enabled = thp_read_string(path, thp_enabled_strings); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) { + settings->shmem_hugepages[i].enabled = SHMEM_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + settings->shmem_hugepages[i].enabled = + thp_read_string(path, shmem_enabled_strings); + } } void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int enabled; int i; @@ -271,6 +285,15 @@ void thp_write_settings(struct thp_settings *settings) enabled = settings->hugepages[i].enabled; thp_write_string(path, thp_enabled_strings[enabled]); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + enabled = settings->shmem_hugepages[i].enabled; + thp_write_string(path, shmem_enabled_strings[enabled]); + } } struct thp_settings *thp_current_settings(void) @@ -324,17 +347,18 @@ void thp_set_read_ahead_path(char *path) dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; } -unsigned long thp_supported_orders(void) +static unsigned long __thp_supported_orders(bool is_shmem) { unsigned long orders = 0; char path[PATH_MAX]; char buf[256]; - int ret; - int i; + int ret, i; + char anon_dir[] = "enabled"; + char shmem_dir[] = "shmem_enabled"; for (i = 0; i < NR_ORDERS; i++) { - ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", - (getpagesize() >> 10) << i); + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/%s", + (getpagesize() >> 10) << i, is_shmem ? shmem_dir : anon_dir); if (ret >= PATH_MAX) { printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); @@ -347,3 +371,13 @@ unsigned long thp_supported_orders(void) return orders; } + +unsigned long thp_supported_orders(void) +{ + return __thp_supported_orders(false); +} + +unsigned long thp_shmem_supported_orders(void) +{ + return __thp_supported_orders(true); +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 71cbff05f4c7..876235a23460 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -22,10 +22,11 @@ enum thp_defrag { }; enum shmem_enabled { + SHMEM_NEVER, SHMEM_ALWAYS, SHMEM_WITHIN_SIZE, SHMEM_ADVISE, - SHMEM_NEVER, + SHMEM_INHERIT, SHMEM_DENY, SHMEM_FORCE, }; @@ -46,6 +47,10 @@ struct khugepaged_settings { unsigned long pages_to_scan; }; +struct shmem_hugepages_settings { + enum shmem_enabled enabled; +}; + struct thp_settings { enum thp_enabled thp_enabled; enum thp_defrag thp_defrag; @@ -54,6 +59,7 @@ struct thp_settings { struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; struct hugepages_settings hugepages[NR_ORDERS]; + struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS]; }; int read_file(const char *path, char *buf, size_t buflen); @@ -76,5 +82,6 @@ void thp_save_settings(void); void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); +unsigned long thp_shmem_supported_orders(void); #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 5a62530da3b5..d8d0cf04bb57 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -12,6 +12,7 @@ #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" #define SMAP_FILE_PATH "/proc/self/smaps" +#define STATUS_FILE_PATH "/proc/self/status" #define MAX_LINE_LENGTH 500 unsigned int __page_size; @@ -171,6 +172,27 @@ uint64_t read_pmd_pagesize(void) return strtoul(buf, NULL, 10); } +unsigned long rss_anon(void) +{ + unsigned long rss_anon = 0; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + + fp = fopen(STATUS_FILE_PATH, "r"); + if (!fp) + ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH); + + if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer))) + goto err_out; + + if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1) + ksft_exit_fail_msg("Reading status error\n"); + +err_out: + fclose(fp); + return rss_anon; +} + bool __check_huge(void *addr, char *pattern, int nr_hpages, uint64_t hpage_size) { diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9007c420d52c..2eaed8209925 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -39,6 +39,7 @@ unsigned long pagemap_get_pfn(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); +unsigned long rss_anon(void); bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c index 6a2caba19ee1..1289d311efd7 100644 --- a/tools/testing/selftests/mm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c @@ -28,7 +28,7 @@ enum method { /* Global variables. */ static const char *self; -static char *shmaddr; +static int *shmaddr; static int shmid; /* @@ -47,15 +47,17 @@ void sig_handler(int signo) { printf("Received %d.\n", signo); if (signo == SIGINT) { - printf("Deleting the memory\n"); - if (shmdt((const void *)shmaddr) != 0) { - perror("Detach failure"); + if (shmaddr) { + printf("Deleting the memory\n"); + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + shmctl(shmid, IPC_RMID, NULL); - exit(4); + printf("Done deleting the memory\n"); } - - shmctl(shmid, IPC_RMID, NULL); - printf("Done deleting the memory\n"); } exit(2); } @@ -211,7 +213,8 @@ int main(int argc, char **argv) shmctl(shmid, IPC_RMID, NULL); exit(2); } - printf("shmaddr: %p\n", ptr); + shmaddr = ptr; + printf("shmaddr: %p\n", shmaddr); break; default: |