diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-02 23:55:34 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-04-02 23:55:34 +0300 |
commit | 6cad420cc695867b4ca710bac21fde21a4102e4b (patch) | |
tree | 890d42abc1e82c2cf5cef583584f88ca70116ce9 /tools | |
parent | 7be97138e7276c71cc9ad1752dcb502d28f4400d (diff) | |
parent | 77d6b9094819ba55353de0ef92957f3f54f2c36c (diff) | |
download | linux-6cad420cc695867b4ca710bac21fde21a4102e4b.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
"A large amount of MM, plenty more to come.
Subsystems affected by this patch series:
- tools
- kthread
- kbuild
- scripts
- ocfs2
- vfs
- mm: slub, kmemleak, pagecache, gup, swap, memcg, pagemap, mremap,
sparsemem, kasan, pagealloc, vmscan, compaction, mempolicy,
hugetlbfs, hugetlb"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (155 commits)
include/linux/huge_mm.h: check PageTail in hpage_nr_pages even when !THP
mm/hugetlb: fix build failure with HUGETLB_PAGE but not HUGEBTLBFS
selftests/vm: fix map_hugetlb length used for testing read and write
mm/hugetlb: remove unnecessary memory fetch in PageHeadHuge()
mm/hugetlb.c: clean code by removing unnecessary initialization
hugetlb_cgroup: add hugetlb_cgroup reservation docs
hugetlb_cgroup: add hugetlb_cgroup reservation tests
hugetlb: support file_region coalescing again
hugetlb_cgroup: support noreserve mappings
hugetlb_cgroup: add accounting for shared mappings
hugetlb: disable region_add file_region coalescing
hugetlb_cgroup: add reservation accounting for private mappings
mm/hugetlb_cgroup: fix hugetlb_cgroup migration
hugetlb_cgroup: add interface for charge/uncharge hugetlb reservations
hugetlb_cgroup: add hugetlb_cgroup reservation counter
hugetlbfs: Use i_mmap_rwsem to address page fault/truncate race
hugetlbfs: use i_mmap_rwsem for more pmd sharing synchronization
mm/memblock.c: remove redundant assignment to variable max_addr
mm: mempolicy: require at least one nodeid for MPOL_PREFERRED
mm: mempolicy: use VM_BUG_ON_VMA in queue_pages_test_walk()
...
Diffstat (limited to 'tools')
-rw-r--r-- | tools/accounting/getdelays.c | 2 | ||||
-rw-r--r-- | tools/testing/selftests/vm/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/vm/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/vm/charge_reserved_hugetlb.sh | 575 | ||||
-rw-r--r-- | tools/testing/selftests/vm/gup_benchmark.c | 15 | ||||
-rw-r--r-- | tools/testing/selftests/vm/hugetlb_reparenting_test.sh | 244 | ||||
-rw-r--r-- | tools/testing/selftests/vm/map_hugetlb.c | 14 | ||||
-rw-r--r-- | tools/testing/selftests/vm/mlock2-tests.c | 233 | ||||
-rw-r--r-- | tools/testing/selftests/vm/mremap_dontunmap.c | 313 | ||||
-rwxr-xr-x | tools/testing/selftests/vm/run_vmtests | 37 | ||||
-rw-r--r-- | tools/testing/selftests/vm/write_hugetlb_memory.sh | 23 | ||||
-rw-r--r-- | tools/testing/selftests/vm/write_to_hugetlbfs.c | 242 |
12 files changed, 1496 insertions, 205 deletions
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c index 8cb504d30384..5ef1c15e88ad 100644 --- a/tools/accounting/getdelays.c +++ b/tools/accounting/getdelays.c @@ -136,7 +136,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, msg.g.version = 0x1; na = (struct nlattr *) GENLMSG_DATA(&msg); na->nla_type = nla_type; - na->nla_len = nla_len + 1 + NLA_HDRLEN; + na->nla_len = nla_len + NLA_HDRLEN; memcpy(NLA_DATA(na), nla_data, nla_len); msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 31b3c98b6d34..d3bed9407773 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -14,3 +14,4 @@ virtual_address_range gup_benchmark va_128TBswitch map_fixed_noreplace +write_to_hugetlbfs diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 7f9a8a8c31da..d31db052dff6 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -14,6 +14,7 @@ TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_populate TEST_GEN_FILES += mlock-random-test TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += mremap_dontunmap TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress @@ -22,6 +23,7 @@ TEST_GEN_FILES += userfaultfd ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64)) TEST_GEN_FILES += va_128TBswitch TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += write_to_hugetlbfs endif TEST_PROGS := run_vmtests diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh new file mode 100644 index 000000000000..18d33684faad --- /dev/null +++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh @@ -0,0 +1,575 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +set -e + +if [[ $(id -u) -ne 0 ]]; then + echo "This test must be run as root. Skipping..." + exit 0 +fi + +fault_limit_file=limit_in_bytes +reservation_limit_file=rsvd.limit_in_bytes +fault_usage_file=usage_in_bytes +reservation_usage_file=rsvd.usage_in_bytes + +if [[ "$1" == "-cgroup-v2" ]]; then + cgroup2=1 + fault_limit_file=max + reservation_limit_file=rsvd.max + fault_usage_file=current + reservation_usage_file=rsvd.current +fi + +cgroup_path=/dev/cgroup/memory +if [[ ! -e $cgroup_path ]]; then + mkdir -p $cgroup_path + if [[ $cgroup2 ]]; then + mount -t cgroup2 none $cgroup_path + else + mount -t cgroup memory,hugetlb $cgroup_path + fi +fi + +if [[ $cgroup2 ]]; then + echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control +fi + +function cleanup() { + if [[ $cgroup2 ]]; then + echo $$ >$cgroup_path/cgroup.procs + else + echo $$ >$cgroup_path/tasks + fi + + if [[ -e /mnt/huge ]]; then + rm -rf /mnt/huge/* + umount /mnt/huge || echo error + rmdir /mnt/huge + fi + if [[ -e $cgroup_path/hugetlb_cgroup_test ]]; then + rmdir $cgroup_path/hugetlb_cgroup_test + fi + if [[ -e $cgroup_path/hugetlb_cgroup_test1 ]]; then + rmdir $cgroup_path/hugetlb_cgroup_test1 + fi + if [[ -e $cgroup_path/hugetlb_cgroup_test2 ]]; then + rmdir $cgroup_path/hugetlb_cgroup_test2 + fi + echo 0 >/proc/sys/vm/nr_hugepages + echo CLEANUP DONE +} + +function expect_equal() { + local expected="$1" + local actual="$2" + local error="$3" + + if [[ "$expected" != "$actual" ]]; then + echo "expected ($expected) != actual ($actual): $3" + cleanup + exit 1 + fi +} + +function get_machine_hugepage_size() { + hpz=$(grep -i hugepagesize /proc/meminfo) + kb=${hpz:14:-3} + mb=$(($kb / 1024)) + echo $mb +} + +MB=$(get_machine_hugepage_size) + +function setup_cgroup() { + local name="$1" + local cgroup_limit="$2" + local reservation_limit="$3" + + mkdir $cgroup_path/$name + + echo writing cgroup limit: "$cgroup_limit" + echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file + + echo writing reseravation limit: "$reservation_limit" + echo "$reservation_limit" > \ + $cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file + + if [ -e "$cgroup_path/$name/cpuset.cpus" ]; then + echo 0 >$cgroup_path/$name/cpuset.cpus + fi + if [ -e "$cgroup_path/$name/cpuset.mems" ]; then + echo 0 >$cgroup_path/$name/cpuset.mems + fi +} + +function wait_for_hugetlb_memory_to_get_depleted() { + local cgroup="$1" + local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + # Wait for hugetlbfs memory to get depleted. + while [ $(cat $path) != 0 ]; do + echo Waiting for hugetlb memory to get depleted. + cat $path + sleep 0.5 + done +} + +function wait_for_hugetlb_memory_to_get_reserved() { + local cgroup="$1" + local size="$2" + + local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file" + # Wait for hugetlbfs memory to get written. + while [ $(cat $path) != $size ]; do + echo Waiting for hugetlb memory reservation to reach size $size. + cat $path + sleep 0.5 + done +} + +function wait_for_hugetlb_memory_to_get_written() { + local cgroup="$1" + local size="$2" + + local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file" + # Wait for hugetlbfs memory to get written. + while [ $(cat $path) != $size ]; do + echo Waiting for hugetlb memory to reach size $size. + cat $path + sleep 0.5 + done +} + +function write_hugetlbfs_and_get_usage() { + local cgroup="$1" + local size="$2" + local populate="$3" + local write="$4" + local path="$5" + local method="$6" + local private="$7" + local expect_failure="$8" + local reserve="$9" + + # Function return values. + reservation_failed=0 + oom_killed=0 + hugetlb_difference=0 + reserved_difference=0 + + local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file + local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file + + local hugetlb_before=$(cat $hugetlb_usage) + local reserved_before=$(cat $reserved_usage) + + echo + echo Starting: + echo hugetlb_usage="$hugetlb_before" + echo reserved_usage="$reserved_before" + echo expect_failure is "$expect_failure" + + output=$(mktemp) + set +e + if [[ "$method" == "1" ]] || [[ "$method" == 2 ]] || + [[ "$private" == "-r" ]] && [[ "$expect_failure" != 1 ]]; then + + bash write_hugetlb_memory.sh "$size" "$populate" "$write" \ + "$cgroup" "$path" "$method" "$private" "-l" "$reserve" 2>&1 | tee $output & + + local write_result=$? + local write_pid=$! + + until grep -q -i "DONE" $output; do + echo waiting for DONE signal. + if ! ps $write_pid > /dev/null + then + echo "FAIL: The write died" + cleanup + exit 1 + fi + sleep 0.5 + done + + echo ================= write_hugetlb_memory.sh output is: + cat $output + echo ================= end output. + + if [[ "$populate" == "-o" ]] || [[ "$write" == "-w" ]]; then + wait_for_hugetlb_memory_to_get_written "$cgroup" "$size" + elif [[ "$reserve" != "-n" ]]; then + wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size" + else + # This case doesn't produce visible effects, but we still have + # to wait for the async process to start and execute... + sleep 0.5 + fi + + echo write_result is $write_result + else + bash write_hugetlb_memory.sh "$size" "$populate" "$write" \ + "$cgroup" "$path" "$method" "$private" "$reserve" + local write_result=$? + + if [[ "$reserve" != "-n" ]]; then + wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size" + fi + fi + set -e + + if [[ "$write_result" == 1 ]]; then + reservation_failed=1 + fi + + # On linus/master, the above process gets SIGBUS'd on oomkill, with + # return code 135. On earlier kernels, it gets actual oomkill, with return + # code 137, so just check for both conditions in case we're testing + # against an earlier kernel. + if [[ "$write_result" == 135 ]] || [[ "$write_result" == 137 ]]; then + oom_killed=1 + fi + + local hugetlb_after=$(cat $hugetlb_usage) + local reserved_after=$(cat $reserved_usage) + + echo After write: + echo hugetlb_usage="$hugetlb_after" + echo reserved_usage="$reserved_after" + + hugetlb_difference=$(($hugetlb_after - $hugetlb_before)) + reserved_difference=$(($reserved_after - $reserved_before)) +} + +function cleanup_hugetlb_memory() { + set +e + local cgroup="$1" + if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then + echo killing write_to_hugetlbfs + killall -2 write_to_hugetlbfs + wait_for_hugetlb_memory_to_get_depleted $cgroup + fi + set -e + + if [[ -e /mnt/huge ]]; then + rm -rf /mnt/huge/* + umount /mnt/huge + rmdir /mnt/huge + fi +} + +function run_test() { + local size=$(($1 * ${MB} * 1024 * 1024)) + local populate="$2" + local write="$3" + local cgroup_limit=$(($4 * ${MB} * 1024 * 1024)) + local reservation_limit=$(($5 * ${MB} * 1024 * 1024)) + local nr_hugepages="$6" + local method="$7" + local private="$8" + local expect_failure="$9" + local reserve="${10}" + + # Function return values. + hugetlb_difference=0 + reserved_difference=0 + reservation_failed=0 + oom_killed=0 + + echo nr hugepages = "$nr_hugepages" + echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages + + setup_cgroup "hugetlb_cgroup_test" "$cgroup_limit" "$reservation_limit" + + mkdir -p /mnt/huge + mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge + + write_hugetlbfs_and_get_usage "hugetlb_cgroup_test" "$size" "$populate" \ + "$write" "/mnt/huge/test" "$method" "$private" "$expect_failure" \ + "$reserve" + + cleanup_hugetlb_memory "hugetlb_cgroup_test" + + local final_hugetlb=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$fault_usage_file) + local final_reservation=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$reservation_usage_file) + + echo $hugetlb_difference + echo $reserved_difference + expect_equal "0" "$final_hugetlb" "final hugetlb is not zero" + expect_equal "0" "$final_reservation" "final reservation is not zero" +} + +function run_multiple_cgroup_test() { + local size1="$1" + local populate1="$2" + local write1="$3" + local cgroup_limit1="$4" + local reservation_limit1="$5" + + local size2="$6" + local populate2="$7" + local write2="$8" + local cgroup_limit2="$9" + local reservation_limit2="${10}" + + local nr_hugepages="${11}" + local method="${12}" + local private="${13}" + local expect_failure="${14}" + local reserve="${15}" + + # Function return values. + hugetlb_difference1=0 + reserved_difference1=0 + reservation_failed1=0 + oom_killed1=0 + + hugetlb_difference2=0 + reserved_difference2=0 + reservation_failed2=0 + oom_killed2=0 + + echo nr hugepages = "$nr_hugepages" + echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages + + setup_cgroup "hugetlb_cgroup_test1" "$cgroup_limit1" "$reservation_limit1" + setup_cgroup "hugetlb_cgroup_test2" "$cgroup_limit2" "$reservation_limit2" + + mkdir -p /mnt/huge + mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge + + write_hugetlbfs_and_get_usage "hugetlb_cgroup_test1" "$size1" \ + "$populate1" "$write1" "/mnt/huge/test1" "$method" "$private" \ + "$expect_failure" "$reserve" + + hugetlb_difference1=$hugetlb_difference + reserved_difference1=$reserved_difference + reservation_failed1=$reservation_failed + oom_killed1=$oom_killed + + local cgroup1_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$fault_usage_file + local cgroup1_reservation_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$reservation_usage_file + local cgroup2_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$fault_usage_file + local cgroup2_reservation_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$reservation_usage_file + + local usage_before_second_write=$(cat $cgroup1_hugetlb_usage) + local reservation_usage_before_second_write=$(cat $cgroup1_reservation_usage) + + write_hugetlbfs_and_get_usage "hugetlb_cgroup_test2" "$size2" \ + "$populate2" "$write2" "/mnt/huge/test2" "$method" "$private" \ + "$expect_failure" "$reserve" + + hugetlb_difference2=$hugetlb_difference + reserved_difference2=$reserved_difference + reservation_failed2=$reservation_failed + oom_killed2=$oom_killed + + expect_equal "$usage_before_second_write" \ + "$(cat $cgroup1_hugetlb_usage)" "Usage changed." + expect_equal "$reservation_usage_before_second_write" \ + "$(cat $cgroup1_reservation_usage)" "Reservation usage changed." + + cleanup_hugetlb_memory + + local final_hugetlb=$(cat $cgroup1_hugetlb_usage) + local final_reservation=$(cat $cgroup1_reservation_usage) + + expect_equal "0" "$final_hugetlb" \ + "hugetlbt_cgroup_test1 final hugetlb is not zero" + expect_equal "0" "$final_reservation" \ + "hugetlbt_cgroup_test1 final reservation is not zero" + + local final_hugetlb=$(cat $cgroup2_hugetlb_usage) + local final_reservation=$(cat $cgroup2_reservation_usage) + + expect_equal "0" "$final_hugetlb" \ + "hugetlb_cgroup_test2 final hugetlb is not zero" + expect_equal "0" "$final_reservation" \ + "hugetlb_cgroup_test2 final reservation is not zero" +} + +cleanup + +for populate in "" "-o"; do + for method in 0 1 2; do + for private in "" "-r"; do + for reserve in "" "-n"; do + + # Skip mmap(MAP_HUGETLB | MAP_SHARED). Doesn't seem to be supported. + if [[ "$method" == 1 ]] && [[ "$private" == "" ]]; then + continue + fi + + # Skip populated shmem tests. Doesn't seem to be supported. + if [[ "$method" == 2"" ]] && [[ "$populate" == "-o" ]]; then + continue + fi + + if [[ "$method" == 2"" ]] && [[ "$reserve" == "-n" ]]; then + continue + fi + + cleanup + echo + echo + echo + echo Test normal case. + echo private=$private, populate=$populate, method=$method, reserve=$reserve + run_test 5 "$populate" "" 10 10 10 "$method" "$private" "0" "$reserve" + + echo Memory charged to hugtlb=$hugetlb_difference + echo Memory charged to reservation=$reserved_difference + + if [[ "$populate" == "-o" ]]; then + expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \ + "Reserved memory charged to hugetlb cgroup." + else + expect_equal "0" "$hugetlb_difference" \ + "Reserved memory charged to hugetlb cgroup." + fi + + if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then + expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \ + "Reserved memory not charged to reservation usage." + else + expect_equal "0" "$reserved_difference" \ + "Reserved memory not charged to reservation usage." + fi + + echo 'PASS' + + cleanup + echo + echo + echo + echo Test normal case with write. + echo private=$private, populate=$populate, method=$method, reserve=$reserve + run_test 5 "$populate" '-w' 5 5 10 "$method" "$private" "0" "$reserve" + + echo Memory charged to hugtlb=$hugetlb_difference + echo Memory charged to reservation=$reserved_difference + + expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \ + "Reserved memory charged to hugetlb cgroup." + + expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \ + "Reserved memory not charged to reservation usage." + + echo 'PASS' + + cleanup + continue + echo + echo + echo + echo Test more than reservation case. + echo private=$private, populate=$populate, method=$method, reserve=$reserve + + if [ "$reserve" != "-n" ]; then + run_test "5" "$populate" '' "10" "2" "10" "$method" "$private" "1" \ + "$reserve" + + expect_equal "1" "$reservation_failed" "Reservation succeeded." + fi + + echo 'PASS' + + cleanup + + echo + echo + echo + echo Test more than cgroup limit case. + echo private=$private, populate=$populate, method=$method, reserve=$reserve + + # Not sure if shm memory can be cleaned up when the process gets sigbus'd. + if [[ "$method" != 2 ]]; then + run_test 5 "$populate" "-w" 2 10 10 "$method" "$private" "1" "$reserve" + + expect_equal "1" "$oom_killed" "Not oom killed." + fi + echo 'PASS' + + cleanup + + echo + echo + echo + echo Test normal case, multiple cgroups. + echo private=$private, populate=$populate, method=$method, reserve=$reserve + run_multiple_cgroup_test "3" "$populate" "" "10" "10" "5" \ + "$populate" "" "10" "10" "10" \ + "$method" "$private" "0" "$reserve" + + echo Memory charged to hugtlb1=$hugetlb_difference1 + echo Memory charged to reservation1=$reserved_difference1 + echo Memory charged to hugtlb2=$hugetlb_difference2 + echo Memory charged to reservation2=$reserved_difference2 + + if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then + expect_equal "3" "$reserved_difference1" \ + "Incorrect reservations charged to cgroup 1." + + expect_equal "5" "$reserved_difference2" \ + "Incorrect reservation charged to cgroup 2." + + else + expect_equal "0" "$reserved_difference1" \ + "Incorrect reservations charged to cgroup 1." + + expect_equal "0" "$reserved_difference2" \ + "Incorrect reservation charged to cgroup 2." + fi + + if [[ "$populate" == "-o" ]]; then + expect_equal "3" "$hugetlb_difference1" \ + "Incorrect hugetlb charged to cgroup 1." + + expect_equal "5" "$hugetlb_difference2" \ + "Incorrect hugetlb charged to cgroup 2." + + else + expect_equal "0" "$hugetlb_difference1" \ + "Incorrect hugetlb charged to cgroup 1." + + expect_equal "0" "$hugetlb_difference2" \ + "Incorrect hugetlb charged to cgroup 2." + fi + echo 'PASS' + + cleanup + echo + echo + echo + echo Test normal case with write, multiple cgroups. + echo private=$private, populate=$populate, method=$method, reserve=$reserve + run_multiple_cgroup_test "3" "$populate" "-w" "10" "10" "5" \ + "$populate" "-w" "10" "10" "10" \ + "$method" "$private" "0" "$reserve" + + echo Memory charged to hugtlb1=$hugetlb_difference1 + echo Memory charged to reservation1=$reserved_difference1 + echo Memory charged to hugtlb2=$hugetlb_difference2 + echo Memory charged to reservation2=$reserved_difference2 + + expect_equal "3" "$hugetlb_difference1" \ + "Incorrect hugetlb charged to cgroup 1." + + expect_equal "3" "$reserved_difference1" \ + "Incorrect reservation charged to cgroup 1." + + expect_equal "5" "$hugetlb_difference2" \ + "Incorrect hugetlb charged to cgroup 2." + + expect_equal "5" "$reserved_difference2" \ + "Incorrected reservation charged to cgroup 2." + echo 'PASS' + + cleanup + + done # reserve + done # private + done # populate +done # method + +umount $cgroup_path +rmdir $cgroup_path diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 389327e9b30a..43b4dfe161a2 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -18,6 +18,10 @@ #define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) #define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ +#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark) +#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark) + /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ @@ -40,8 +44,14 @@ int main(int argc, char **argv) char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) { switch (opt) { + case 'a': + cmd = PIN_FAST_BENCHMARK; + break; + case 'b': + cmd = PIN_BENCHMARK; + break; case 'm': size = atoi(optarg) * MB; break; @@ -63,6 +73,9 @@ int main(int argc, char **argv) case 'U': cmd = GUP_BENCHMARK; break; + case 'u': + cmd = GUP_FAST_BENCHMARK; + break; case 'w': write = 1; break; diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh new file mode 100644 index 000000000000..d11d1febccc3 --- /dev/null +++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh @@ -0,0 +1,244 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e + +if [[ $(id -u) -ne 0 ]]; then + echo "This test must be run as root. Skipping..." + exit 0 +fi + +usage_file=usage_in_bytes + +if [[ "$1" == "-cgroup-v2" ]]; then + cgroup2=1 + usage_file=current +fi + +CGROUP_ROOT='/dev/cgroup/memory' +MNT='/mnt/huge/' + +if [[ ! -e $CGROUP_ROOT ]]; then + mkdir -p $CGROUP_ROOT + if [[ $cgroup2 ]]; then + mount -t cgroup2 none $CGROUP_ROOT + sleep 1 + echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control + else + mount -t cgroup memory,hugetlb $CGROUP_ROOT + fi +fi + +function get_machine_hugepage_size() { + hpz=$(grep -i hugepagesize /proc/meminfo) + kb=${hpz:14:-3} + mb=$(($kb / 1024)) + echo $mb +} + +MB=$(get_machine_hugepage_size) + +function cleanup() { + echo cleanup + set +e + rm -rf "$MNT"/* 2>/dev/null + umount "$MNT" 2>/dev/null + rmdir "$MNT" 2>/dev/null + rmdir "$CGROUP_ROOT"/a/b 2>/dev/null + rmdir "$CGROUP_ROOT"/a 2>/dev/null + rmdir "$CGROUP_ROOT"/test1 2>/dev/null + echo 0 >/proc/sys/vm/nr_hugepages + set -e +} + +function assert_state() { + local expected_a="$1" + local expected_a_hugetlb="$2" + local expected_b="" + local expected_b_hugetlb="" + + if [ ! -z ${3:-} ] && [ ! -z ${4:-} ]; then + expected_b="$3" + expected_b_hugetlb="$4" + fi + local tolerance=$((5 * 1024 * 1024)) + + local actual_a + actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)" + if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] || + [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then + echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB + echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB + echo fail + + cleanup + exit 1 + fi + + local actual_a_hugetlb + actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)" + if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] || + [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then + echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB + echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB + echo fail + + cleanup + exit 1 + fi + + if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then + return + fi + + local actual_b + actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)" + if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] || + [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then + echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB + echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB + echo fail + + cleanup + exit 1 + fi + + local actual_b_hugetlb + actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)" + if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] || + [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then + echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB + echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB + echo fail + + cleanup + exit 1 + fi +} + +function setup() { + echo 100 >/proc/sys/vm/nr_hugepages + mkdir "$CGROUP_ROOT"/a + sleep 1 + if [[ $cgroup2 ]]; then + echo "+hugetlb +memory" >$CGROUP_ROOT/a/cgroup.subtree_control + else + echo 0 >$CGROUP_ROOT/a/cpuset.mems + echo 0 >$CGROUP_ROOT/a/cpuset.cpus + fi + + mkdir "$CGROUP_ROOT"/a/b + + if [[ ! $cgroup2 ]]; then + echo 0 >$CGROUP_ROOT/a/b/cpuset.mems + echo 0 >$CGROUP_ROOT/a/b/cpuset.cpus + fi + + mkdir -p "$MNT" + mount -t hugetlbfs none "$MNT" +} + +write_hugetlbfs() { + local cgroup="$1" + local path="$2" + local size="$3" + + if [[ $cgroup2 ]]; then + echo $$ >$CGROUP_ROOT/$cgroup/cgroup.procs + else + echo 0 >$CGROUP_ROOT/$cgroup/cpuset.mems + echo 0 >$CGROUP_ROOT/$cgroup/cpuset.cpus + echo $$ >"$CGROUP_ROOT/$cgroup/tasks" + fi + ./write_to_hugetlbfs -p "$path" -s "$size" -m 0 -o + if [[ $cgroup2 ]]; then + echo $$ >$CGROUP_ROOT/cgroup.procs + else + echo $$ >"$CGROUP_ROOT/tasks" + fi + echo +} + +set -e + +size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages. + +cleanup + +echo +echo +echo Test charge, rmdir, uncharge +setup +echo mkdir +mkdir $CGROUP_ROOT/test1 + +echo write +write_hugetlbfs test1 "$MNT"/test $size + +echo rmdir +rmdir $CGROUP_ROOT/test1 +mkdir $CGROUP_ROOT/test1 + +echo uncharge +rm -rf /mnt/huge/* + +cleanup + +echo done +echo +echo +if [[ ! $cgroup2 ]]; then + echo "Test parent and child hugetlb usage" + setup + + echo write + write_hugetlbfs a "$MNT"/test $size + + echo Assert memory charged correctly for parent use. + assert_state 0 $size 0 0 + + write_hugetlbfs a/b "$MNT"/test2 $size + + echo Assert memory charged correctly for child use. + assert_state 0 $(($size * 2)) 0 $size + + rmdir "$CGROUP_ROOT"/a/b + sleep 5 + echo Assert memory reparent correctly. + assert_state 0 $(($size * 2)) + + rm -rf "$MNT"/* + umount "$MNT" + echo Assert memory uncharged correctly. + assert_state 0 0 + + cleanup +fi + +echo +echo +echo "Test child only hugetlb usage" +echo setup +setup + +echo write +write_hugetlbfs a/b "$MNT"/test2 $size + +echo Assert memory charged correctly for child only use. +assert_state 0 $(($size)) 0 $size + +rmdir "$CGROUP_ROOT"/a/b +echo Assert memory reparent correctly. +assert_state 0 $size + +rm -rf "$MNT"/* +umount "$MNT" +echo Assert memory uncharged correctly. +assert_state 0 0 + +cleanup + +echo ALL PASS + +umount $CGROUP_ROOT +rm -rf $CGROUP_ROOT diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 5a2d7b8efc40..6af951900aa3 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -45,20 +45,20 @@ static void check_bytes(char *addr) printf("First hex is %x\n", *((unsigned int *)addr)); } -static void write_bytes(char *addr) +static void write_bytes(char *addr, size_t length) { unsigned long i; - for (i = 0; i < LENGTH; i++) + for (i = 0; i < length; i++) *(addr + i) = (char)i; } -static int read_bytes(char *addr) +static int read_bytes(char *addr, size_t length) { unsigned long i; check_bytes(addr); - for (i = 0; i < LENGTH; i++) + for (i = 0; i < length; i++) if (*(addr + i) != (char)i) { printf("Mismatch at %lu\n", i); return 1; @@ -96,11 +96,11 @@ int main(int argc, char **argv) printf("Returned address is %p\n", addr); check_bytes(addr); - write_bytes(addr); - ret = read_bytes(addr); + write_bytes(addr, length); + ret = read_bytes(addr, length); /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ - if (munmap(addr, LENGTH)) { + if (munmap(addr, length)) { perror("munmap"); exit(1); } diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c index 637b6d0ac0d0..11b2301f3aa3 100644 --- a/tools/testing/selftests/vm/mlock2-tests.c +++ b/tools/testing/selftests/vm/mlock2-tests.c @@ -67,59 +67,6 @@ out: return ret; } -static uint64_t get_pageflags(unsigned long addr) -{ - FILE *file; - uint64_t pfn; - unsigned long offset; - - file = fopen("/proc/self/pagemap", "r"); - if (!file) { - perror("fopen pagemap"); - _exit(1); - } - - offset = addr / getpagesize() * sizeof(pfn); - - if (fseek(file, offset, SEEK_SET)) { - perror("fseek pagemap"); - _exit(1); - } - - if (fread(&pfn, sizeof(pfn), 1, file) != 1) { - perror("fread pagemap"); - _exit(1); - } - - fclose(file); - return pfn; -} - -static uint64_t get_kpageflags(unsigned long pfn) -{ - uint64_t flags; - FILE *file; - - file = fopen("/proc/kpageflags", "r"); - if (!file) { - perror("fopen kpageflags"); - _exit(1); - } - - if (fseek(file, pfn * sizeof(flags), SEEK_SET)) { - perror("fseek kpageflags"); - _exit(1); - } - - if (fread(&flags, sizeof(flags), 1, file) != 1) { - perror("fread kpageflags"); - _exit(1); - } - - fclose(file); - return flags; -} - #define VMFLAGS "VmFlags:" static bool is_vmflag_set(unsigned long addr, const char *vmflag) @@ -159,19 +106,13 @@ out: #define RSS "Rss:" #define LOCKED "lo" -static bool is_vma_lock_on_fault(unsigned long addr) +static unsigned long get_value_for_name(unsigned long addr, const char *name) { - bool ret = false; - bool locked; - FILE *smaps = NULL; - unsigned long vma_size, vma_rss; char *line = NULL; - char *value; size_t size = 0; - - locked = is_vmflag_set(addr, LOCKED); - if (!locked) - goto out; + char *value_ptr; + FILE *smaps = NULL; + unsigned long value = -1UL; smaps = seek_to_smaps_entry(addr); if (!smaps) { @@ -180,112 +121,70 @@ static bool is_vma_lock_on_fault(unsigned long addr) } while (getline(&line, &size, smaps) > 0) { - if (!strstr(line, SIZE)) { + if (!strstr(line, name)) { free(line); line = NULL; size = 0; continue; } - value = line + strlen(SIZE); - if (sscanf(value, "%lu kB", &vma_size) < 1) { + value_ptr = line + strlen(name); + if (sscanf(value_ptr, "%lu kB", &value) < 1) { printf("Unable to parse smaps entry for Size\n"); goto out; } break; } - while (getline(&line, &size, smaps) > 0) { - if (!strstr(line, RSS)) { - free(line); - line = NULL; - size = 0; - continue; - } - - value = line + strlen(RSS); - if (sscanf(value, "%lu kB", &vma_rss) < 1) { - printf("Unable to parse smaps entry for Rss\n"); - goto out; - } - break; - } - - ret = locked && (vma_rss < vma_size); out: - free(line); if (smaps) fclose(smaps); - return ret; + free(line); + return value; } -#define PRESENT_BIT 0x8000000000000000ULL -#define PFN_MASK 0x007FFFFFFFFFFFFFULL -#define UNEVICTABLE_BIT (1UL << 18) - -static int lock_check(char *map) +static bool is_vma_lock_on_fault(unsigned long addr) { - unsigned long page_size = getpagesize(); - uint64_t page1_flags, page2_flags; + bool locked; + unsigned long vma_size, vma_rss; - page1_flags = get_pageflags((unsigned long)map); - page2_flags = get_pageflags((unsigned long)map + page_size); + locked = is_vmflag_set(addr, LOCKED); + if (!locked) + return false; - /* Both pages should be present */ - if (((page1_flags & PRESENT_BIT) == 0) || - ((page2_flags & PRESENT_BIT) == 0)) { - printf("Failed to make both pages present\n"); - return 1; - } + vma_size = get_value_for_name(addr, SIZE); + vma_rss = get_value_for_name(addr, RSS); - page1_flags = get_kpageflags(page1_flags & PFN_MASK); - page2_flags = get_kpageflags(page2_flags & PFN_MASK); + /* only one page is faulted in */ + return (vma_rss < vma_size); +} - /* Both pages should be unevictable */ - if (((page1_flags & UNEVICTABLE_BIT) == 0) || - ((page2_flags & UNEVICTABLE_BIT) == 0)) { - printf("Failed to make both pages unevictable\n"); - return 1; - } +#define PRESENT_BIT 0x8000000000000000ULL +#define PFN_MASK 0x007FFFFFFFFFFFFFULL +#define UNEVICTABLE_BIT (1UL << 18) - if (!is_vmflag_set((unsigned long)map, LOCKED)) { - printf("VMA flag %s is missing on page 1\n", LOCKED); - return 1; - } +static int lock_check(unsigned long addr) +{ + bool locked; + unsigned long vma_size, vma_rss; - if (!is_vmflag_set((unsigned long)map + page_size, LOCKED)) { - printf("VMA flag %s is missing on page 2\n", LOCKED); - return 1; - } + locked = is_vmflag_set(addr, LOCKED); + if (!locked) + return false; - return 0; + vma_size = get_value_for_name(addr, SIZE); + vma_rss = get_value_for_name(addr, RSS); + + return (vma_rss == vma_size); } static int unlock_lock_check(char *map) { - unsigned long page_size = getpagesize(); - uint64_t page1_flags, page2_flags; - - page1_flags = get_pageflags((unsigned long)map); - page2_flags = get_pageflags((unsigned long)map + page_size); - page1_flags = get_kpageflags(page1_flags & PFN_MASK); - page2_flags = get_kpageflags(page2_flags & PFN_MASK); - - if ((page1_flags & UNEVICTABLE_BIT) || (page2_flags & UNEVICTABLE_BIT)) { - printf("A page is still marked unevictable after unlock\n"); - return 1; - } - if (is_vmflag_set((unsigned long)map, LOCKED)) { printf("VMA flag %s is present on page 1 after unlock\n", LOCKED); return 1; } - if (is_vmflag_set((unsigned long)map + page_size, LOCKED)) { - printf("VMA flag %s is present on page 2 after unlock\n", LOCKED); - return 1; - } - return 0; } @@ -311,7 +210,7 @@ static int test_mlock_lock() goto unmap; } - if (lock_check(map)) + if (!lock_check((unsigned long)map)) goto unmap; /* Now unlock and recheck attributes */ @@ -330,64 +229,18 @@ out: static int onfault_check(char *map) { - unsigned long page_size = getpagesize(); - uint64_t page1_flags, page2_flags; - - page1_flags = get_pageflags((unsigned long)map); - page2_flags = get_pageflags((unsigned long)map + page_size); - - /* Neither page should be present */ - if ((page1_flags & PRESENT_BIT) || (page2_flags & PRESENT_BIT)) { - printf("Pages were made present by MLOCK_ONFAULT\n"); - return 1; - } - *map = 'a'; - page1_flags = get_pageflags((unsigned long)map); - page2_flags = get_pageflags((unsigned long)map + page_size); - - /* Only page 1 should be present */ - if ((page1_flags & PRESENT_BIT) == 0) { - printf("Page 1 is not present after fault\n"); - return 1; - } else if (page2_flags & PRESENT_BIT) { - printf("Page 2 was made present\n"); - return 1; - } - - page1_flags = get_kpageflags(page1_flags & PFN_MASK); - - /* Page 1 should be unevictable */ - if ((page1_flags & UNEVICTABLE_BIT) == 0) { - printf("Failed to make faulted page unevictable\n"); - return 1; - } - if (!is_vma_lock_on_fault((unsigned long)map)) { printf("VMA is not marked for lock on fault\n"); return 1; } - if (!is_vma_lock_on_fault((unsigned long)map + page_size)) { - printf("VMA is not marked for lock on fault\n"); - return 1; - } - return 0; } static int unlock_onfault_check(char *map) { unsigned long page_size = getpagesize(); - uint64_t page1_flags; - - page1_flags = get_pageflags((unsigned long)map); - page1_flags = get_kpageflags(page1_flags & PFN_MASK); - - if (page1_flags & UNEVICTABLE_BIT) { - printf("Page 1 is still marked unevictable after unlock\n"); - return 1; - } if (is_vma_lock_on_fault((unsigned long)map) || is_vma_lock_on_fault((unsigned long)map + page_size)) { @@ -445,7 +298,6 @@ static int test_lock_onfault_of_present() char *map; int ret = 1; unsigned long page_size = getpagesize(); - uint64_t page1_flags, page2_flags; map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); @@ -465,17 +317,6 @@ static int test_lock_onfault_of_present() goto unmap; } - page1_flags = get_pageflags((unsigned long)map); - page2_flags = get_pageflags((unsigned long)map + page_size); - page1_flags = get_kpageflags(page1_flags & PFN_MASK); - page2_flags = get_kpageflags(page2_flags & PFN_MASK); - - /* Page 1 should be unevictable */ - if ((page1_flags & UNEVICTABLE_BIT) == 0) { - printf("Failed to make present page unevictable\n"); - goto unmap; - } - if (!is_vma_lock_on_fault((unsigned long)map) || !is_vma_lock_on_fault((unsigned long)map + page_size)) { printf("VMA with present pages is not marked lock on fault\n"); @@ -507,7 +348,7 @@ static int test_munlockall() goto out; } - if (lock_check(map)) + if (!lock_check((unsigned long)map)) goto unmap; if (munlockall()) { @@ -549,7 +390,7 @@ static int test_munlockall() goto out; } - if (lock_check(map)) + if (!lock_check((unsigned long)map)) goto unmap; if (munlockall()) { diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c new file mode 100644 index 000000000000..ee06cb0b9efb --- /dev/null +++ b/tools/testing/selftests/vm/mremap_dontunmap.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Tests for mremap w/ MREMAP_DONTUNMAP. + * + * Copyright 2020, Brian Geffon <bgeffon@google.com> + */ +#define _GNU_SOURCE +#include <sys/mman.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include "../kselftest.h" + +#ifndef MREMAP_DONTUNMAP +#define MREMAP_DONTUNMAP 4 +#endif + +unsigned long page_size; +char *page_buffer; + +static void dump_maps(void) +{ + char cmd[32]; + + snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid()); + system(cmd); +} + +#define BUG_ON(condition, description) \ + do { \ + if (condition) { \ + fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \ + __LINE__, (description), strerror(errno)); \ + dump_maps(); \ + exit(1); \ + } \ + } while (0) + +// Try a simple operation for to "test" for kernel support this prevents +// reporting tests as failed when it's run on an older kernel. +static int kernel_support_for_mremap_dontunmap() +{ + int ret = 0; + unsigned long num_pages = 1; + void *source_mapping = mmap(NULL, num_pages * page_size, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + + // This simple remap should only fail if MREMAP_DONTUNMAP isn't + // supported. + void *dest_mapping = + mremap(source_mapping, num_pages * page_size, num_pages * page_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, 0); + if (dest_mapping == MAP_FAILED) { + ret = errno; + } else { + BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1, + "unable to unmap destination mapping"); + } + + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); + return ret; +} + +// This helper will just validate that an entire mapping contains the expected +// byte. +static int check_region_contains_byte(void *addr, unsigned long size, char byte) +{ + BUG_ON(size & (page_size - 1), + "check_region_contains_byte expects page multiples"); + BUG_ON((unsigned long)addr & (page_size - 1), + "check_region_contains_byte expects page alignment"); + + memset(page_buffer, byte, page_size); + + unsigned long num_pages = size / page_size; + unsigned long i; + + // Compare each page checking that it contains our expected byte. + for (i = 0; i < num_pages; ++i) { + int ret = + memcmp(addr + (i * page_size), page_buffer, page_size); + if (ret) { + return ret; + } + } + + return 0; +} + +// this test validates that MREMAP_DONTUNMAP moves the pagetables while leaving +// the source mapping mapped. +static void mremap_dontunmap_simple() +{ + unsigned long num_pages = 5; + + void *source_mapping = + mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + + memset(source_mapping, 'a', num_pages * page_size); + + // Try to just move the whole mapping anywhere (not fixed). + void *dest_mapping = + mremap(source_mapping, num_pages * page_size, num_pages * page_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL); + BUG_ON(dest_mapping == MAP_FAILED, "mremap"); + + // Validate that the pages have been moved, we know they were moved if + // the dest_mapping contains a's. + BUG_ON(check_region_contains_byte + (dest_mapping, num_pages * page_size, 'a') != 0, + "pages did not migrate"); + BUG_ON(check_region_contains_byte + (source_mapping, num_pages * page_size, 0) != 0, + "source should have no ptes"); + + BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1, + "unable to unmap destination mapping"); + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); +} + +// This test validates MREMAP_DONTUNMAP will move page tables to a specific +// destination using MREMAP_FIXED, also while validating that the source +// remains intact. +static void mremap_dontunmap_simple_fixed() +{ + unsigned long num_pages = 5; + + // Since we want to guarantee that we can remap to a point, we will + // create a mapping up front. + void *dest_mapping = + mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(dest_mapping == MAP_FAILED, "mmap"); + memset(dest_mapping, 'X', num_pages * page_size); + + void *source_mapping = + mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + memset(source_mapping, 'a', num_pages * page_size); + + void *remapped_mapping = + mremap(source_mapping, num_pages * page_size, num_pages * page_size, + MREMAP_FIXED | MREMAP_DONTUNMAP | MREMAP_MAYMOVE, + dest_mapping); + BUG_ON(remapped_mapping == MAP_FAILED, "mremap"); + BUG_ON(remapped_mapping != dest_mapping, + "mremap should have placed the remapped mapping at dest_mapping"); + + // The dest mapping will have been unmap by mremap so we expect the Xs + // to be gone and replaced with a's. + BUG_ON(check_region_contains_byte + (dest_mapping, num_pages * page_size, 'a') != 0, + "pages did not migrate"); + + // And the source mapping will have had its ptes dropped. + BUG_ON(check_region_contains_byte + (source_mapping, num_pages * page_size, 0) != 0, + "source should have no ptes"); + + BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1, + "unable to unmap destination mapping"); + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); +} + +// This test validates that we can MREMAP_DONTUNMAP for a portion of an +// existing mapping. +static void mremap_dontunmap_partial_mapping() +{ + /* + * source mapping: + * -------------- + * | aaaaaaaaaa | + * -------------- + * to become: + * -------------- + * | aaaaa00000 | + * -------------- + * With the destination mapping containing 5 pages of As. + * --------- + * | aaaaa | + * --------- + */ + unsigned long num_pages = 10; + void *source_mapping = + mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + memset(source_mapping, 'a', num_pages * page_size); + + // We will grab the last 5 pages of the source and move them. + void *dest_mapping = + mremap(source_mapping + (5 * page_size), 5 * page_size, + 5 * page_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL); + BUG_ON(dest_mapping == MAP_FAILED, "mremap"); + + // We expect the first 5 pages of the source to contain a's and the + // final 5 pages to contain zeros. + BUG_ON(check_region_contains_byte(source_mapping, 5 * page_size, 'a') != + 0, "first 5 pages of source should have original pages"); + BUG_ON(check_region_contains_byte + (source_mapping + (5 * page_size), 5 * page_size, 0) != 0, + "final 5 pages of source should have no ptes"); + + // Finally we expect the destination to have 5 pages worth of a's. + BUG_ON(check_region_contains_byte(dest_mapping, 5 * page_size, 'a') != + 0, "dest mapping should contain ptes from the source"); + + BUG_ON(munmap(dest_mapping, 5 * page_size) == -1, + "unable to unmap destination mapping"); + BUG_ON(munmap(source_mapping, num_pages * page_size) == -1, + "unable to unmap source mapping"); +} + +// This test validates that we can remap over only a portion of a mapping. +static void mremap_dontunmap_partial_mapping_overwrite(void) +{ + /* + * source mapping: + * --------- + * |aaaaa| + * --------- + * dest mapping initially: + * ----------- + * |XXXXXXXXXX| + * ------------ + * Source to become: + * --------- + * |00000| + * --------- + * With the destination mapping containing 5 pages of As. + * ------------ + * |aaaaaXXXXX| + * ------------ + */ + void *source_mapping = + mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(source_mapping == MAP_FAILED, "mmap"); + memset(source_mapping, 'a', 5 * page_size); + + void *dest_mapping = + mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(dest_mapping == MAP_FAILED, "mmap"); + memset(dest_mapping, 'X', 10 * page_size); + + // We will grab the last 5 pages of the source and move them. + void *remapped_mapping = + mremap(source_mapping, 5 * page_size, + 5 * page_size, + MREMAP_DONTUNMAP | MREMAP_MAYMOVE | MREMAP_FIXED, dest_mapping); + BUG_ON(dest_mapping == MAP_FAILED, "mremap"); + BUG_ON(dest_mapping != remapped_mapping, "expected to remap to dest_mapping"); + + BUG_ON(check_region_contains_byte(source_mapping, 5 * page_size, 0) != + 0, "first 5 pages of source should have no ptes"); + + // Finally we expect the destination to have 5 pages worth of a's. + BUG_ON(check_region_contains_byte(dest_mapping, 5 * page_size, 'a') != 0, + "dest mapping should contain ptes from the source"); + + // Finally the last 5 pages shouldn't have been touched. + BUG_ON(check_region_contains_byte(dest_mapping + (5 * page_size), + 5 * page_size, 'X') != 0, + "dest mapping should have retained the last 5 pages"); + + BUG_ON(munmap(dest_mapping, 10 * page_size) == -1, + "unable to unmap destination mapping"); + BUG_ON(munmap(source_mapping, 5 * page_size) == -1, + "unable to unmap source mapping"); +} + +int main(void) +{ + page_size = sysconf(_SC_PAGE_SIZE); + + // test for kernel support for MREMAP_DONTUNMAP skipping the test if + // not. + if (kernel_support_for_mremap_dontunmap() != 0) { + printf("No kernel support for MREMAP_DONTUNMAP\n"); + return KSFT_SKIP; + } + + // Keep a page sized buffer around for when we need it. + page_buffer = + mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page."); + + mremap_dontunmap_simple(); + mremap_dontunmap_simple_fixed(); + mremap_dontunmap_partial_mapping(); + mremap_dontunmap_partial_mapping_overwrite(); + + BUG_ON(munmap(page_buffer, page_size) == -1, + "unable to unmap page buffer"); + + printf("OK\n"); + return 0; +} diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index f33714843198..665009ebfba4 100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -123,6 +123,28 @@ else echo "[PASS]" fi +echo "--------------------------------------------" +echo "running 'gup_benchmark -U' (normal/slow gup)" +echo "--------------------------------------------" +./gup_benchmark -U +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + +echo "------------------------------------------" +echo "running gup_benchmark -b (pin_user_pages)" +echo "------------------------------------------" +./gup_benchmark -b +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + echo "-------------------" echo "running userfaultfd" echo "-------------------" @@ -270,4 +292,19 @@ else exitcode=1 fi +echo "------------------------------------" +echo "running MREMAP_DONTUNMAP smoke test" +echo "------------------------------------" +./mremap_dontunmap +ret_val=$? + +if [ $ret_val -eq 0 ]; then + echo "[PASS]" +elif [ $ret_val -eq $ksft_skip ]; then + echo "[SKIP]" + exitcode=$ksft_skip +else + echo "[FAIL]" + exitcode=1 +fi exit $exitcode diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh new file mode 100644 index 000000000000..d3d0d108924d --- /dev/null +++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +set -e + +size=$1 +populate=$2 +write=$3 +cgroup=$4 +path=$5 +method=$6 +private=$7 +want_sleep=$8 +reserve=$9 + +echo "Putting task in cgroup '$cgroup'" +echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs + +echo "Method is $method" + +set +e +./write_to_hugetlbfs -p "$path" -s "$size" "$write" "$populate" -m "$method" \ + "$private" "$want_sleep" "$reserve" diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/vm/write_to_hugetlbfs.c new file mode 100644 index 000000000000..110bc4e4015d --- /dev/null +++ b/tools/testing/selftests/vm/write_to_hugetlbfs.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program reserves and uses hugetlb memory, supporting a bunch of + * scenarios needed by the charged_reserved_hugetlb.sh test. + */ + +#include <err.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/shm.h> +#include <sys/stat.h> +#include <sys/mman.h> + +/* Global definitions. */ +enum method { + HUGETLBFS, + MMAP_MAP_HUGETLB, + SHM, + MAX_METHOD +}; + + +/* Global variables. */ +static const char *self; +static char *shmaddr; +static int shmid; + +/* + * Show usage and exit. + */ +static void exit_usage(void) +{ + printf("Usage: %s -p <path to hugetlbfs file> -s <size to map> " + "[-m <0=hugetlbfs | 1=mmap(MAP_HUGETLB)>] [-l] [-r] " + "[-o] [-w] [-n]\n", + self); + exit(EXIT_FAILURE); +} + +void sig_handler(int signo) +{ + printf("Received %d.\n", signo); + if (signo == SIGINT) { + printf("Deleting the memory\n"); + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + + shmctl(shmid, IPC_RMID, NULL); + printf("Done deleting the memory\n"); + } + exit(2); +} + +int main(int argc, char **argv) +{ + int fd = 0; + int key = 0; + int *ptr = NULL; + int c = 0; + int size = 0; + char path[256] = ""; + enum method method = MAX_METHOD; + int want_sleep = 0, private = 0; + int populate = 0; + int write = 0; + int reserve = 1; + + unsigned long i; + + if (signal(SIGINT, sig_handler) == SIG_ERR) + err(1, "\ncan't catch SIGINT\n"); + + /* Parse command-line arguments. */ + setvbuf(stdout, NULL, _IONBF, 0); + self = argv[0]; + + while ((c = getopt(argc, argv, "s:p:m:owlrn")) != -1) { + switch (c) { + case 's': + size = atoi(optarg); + break; + case 'p': + strncpy(path, optarg, sizeof(path)); + break; + case 'm': + if (atoi(optarg) >= MAX_METHOD) { + errno = EINVAL; + perror("Invalid -m."); + exit_usage(); + } + method = atoi(optarg); + break; + case 'o': + populate = 1; + break; + case 'w': + write = 1; + break; + case 'l': + want_sleep = 1; + break; + case 'r': + private + = 1; + break; + case 'n': + reserve = 0; + break; + default: + errno = EINVAL; + perror("Invalid arg"); + exit_usage(); + } + } + + if (strncmp(path, "", sizeof(path)) != 0) { + printf("Writing to this path: %s\n", path); + } else { + errno = EINVAL; + perror("path not found"); + exit_usage(); + } + + if (size != 0) { + printf("Writing this size: %d\n", size); + } else { + errno = EINVAL; + perror("size not found"); + exit_usage(); + } + + if (!populate) + printf("Not populating.\n"); + else + printf("Populating.\n"); + + if (!write) + printf("Not writing to memory.\n"); + + if (method == MAX_METHOD) { + errno = EINVAL; + perror("-m Invalid"); + exit_usage(); + } else + printf("Using method=%d\n", method); + + if (!private) + printf("Shared mapping.\n"); + else + printf("Private mapping.\n"); + + if (!reserve) + printf("NO_RESERVE mapping.\n"); + else + printf("RESERVE mapping.\n"); + + switch (method) { + case HUGETLBFS: + printf("Allocating using HUGETLBFS.\n"); + fd = open(path, O_CREAT | O_RDWR, 0777); + if (fd == -1) + err(1, "Failed to open file."); + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + (private ? MAP_PRIVATE : MAP_SHARED) | + (populate ? MAP_POPULATE : 0) | + (reserve ? 0 : MAP_NORESERVE), + fd, 0); + + if (ptr == MAP_FAILED) { + close(fd); + err(1, "Error mapping the file"); + } + break; + case MMAP_MAP_HUGETLB: + printf("Allocating using MAP_HUGETLB.\n"); + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + (private ? (MAP_PRIVATE | MAP_ANONYMOUS) : + MAP_SHARED) | + MAP_HUGETLB | (populate ? MAP_POPULATE : 0) | + (reserve ? 0 : MAP_NORESERVE), + -1, 0); + + if (ptr == MAP_FAILED) + err(1, "mmap"); + + printf("Returned address is %p\n", ptr); + break; + case SHM: + printf("Allocating using SHM.\n"); + shmid = shmget(key, size, + SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (shmid < 0) { + shmid = shmget(++key, size, + SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W); + if (shmid < 0) + err(1, "shmget"); + } + printf("shmid: 0x%x, shmget key:%d\n", shmid, key); + + ptr = shmat(shmid, NULL, 0); + if (ptr == (int *)-1) { + perror("Shared memory attach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(2); + } + printf("shmaddr: %p\n", ptr); + + break; + default: + errno = EINVAL; + err(1, "Invalid method."); + } + + if (write) { + printf("Writing to memory.\n"); + memset(ptr, 1, size); + } + + if (want_sleep) { + /* Signal to caller that we're done. */ + printf("DONE\n"); + + /* Hold memory until external kill signal is delivered. */ + while (1) + sleep(100); + } + + if (method == HUGETLBFS) + close(fd); + + return 0; +} |