From 2a912d440c6024148a25850c7c4066b152ec8750 Mon Sep 17 00:00:00 2001 From: Joel Granados Date: Mon, 15 Dec 2025 16:47:37 +0100 Subject: alloc_tag: move memory_allocation_profiling_sysctls into .rodata Remove the change in file mode permissions done before initializing the sysctl. It is not necessary as the writing of the kernel variable will be blocked by the proc_mem_profiling_handler when writing is disallowed (also controlled by mem_profiling_support). Link: https://lkml.kernel.org/r/20251215-jag-alloc_tag_const-v1-1-35ea56a1ce13@kernel.org Signed-off-by: Joel Granados Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 27fee57a5c91..846a5b5b44a4 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -783,7 +783,7 @@ static int proc_mem_profiling_handler(const struct ctl_table *table, int write, } -static struct ctl_table memory_allocation_profiling_sysctls[] = { +static const struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", .data = &mem_alloc_profiling_key, @@ -798,9 +798,6 @@ static struct ctl_table memory_allocation_profiling_sysctls[] = { static void __init sysctl_init(void) { - if (!mem_profiling_support) - memory_allocation_profiling_sysctls[0].mode = 0444; - register_sysctl_init("vm", memory_allocation_profiling_sysctls); } #else /* CONFIG_SYSCTL */ -- cgit v1.2.3 From a98ec863fdedf4940447f32ceda7d937bebd06a2 Mon Sep 17 00:00:00 2001 From: Audra Mitchell Date: Mon, 1 Dec 2025 13:18:48 -0500 Subject: lib/test_vmalloc.c: minor fixes to test_vmalloc.c If PAGE_SIZE is larger than 4k and if you have a system with a large number of CPUs, this test can require a very large amount of memory leading to oom-killer firing. Given the type of allocation, the kernel won't have anything to kill, causing the system to stall. Add a parameter to the test_vmalloc driver to represent the number of times a percpu object will be allocated. Calculate this in test_vmalloc.sh to be 90% of available memory or the current default of 35000, whichever is smaller. Link: https://lkml.kernel.org/r/20251201181848.1216197-1-audra@redhat.com Signed-off-by: Audra Mitchell Reviewed-by: Andrew Morton Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Rafael Aquini Cc: Shuah Khan Cc: Suren Baghdasaryan Cc: "Uladzislau Rezki (Sony)" Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/test_vmalloc.c | 11 +++++++---- tools/testing/selftests/mm/test_vmalloc.sh | 31 +++++++++++++++++++++++++++--- 2 files changed, 35 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/test_vmalloc.c b/lib/test_vmalloc.c index 6521c05c7816..270b6f7ca807 100644 --- a/lib/test_vmalloc.c +++ b/lib/test_vmalloc.c @@ -58,6 +58,9 @@ __param(int, run_test_mask, 7, /* Add a new test case description here. */ ); +__param(int, nr_pcpu_objects, 35000, + "Number of pcpu objects to allocate for pcpu_alloc_test"); + /* * This is for synchronization of setup phase. */ @@ -317,24 +320,24 @@ pcpu_alloc_test(void) size_t size, align; int i; - pcpu = vmalloc(sizeof(void __percpu *) * 35000); + pcpu = vmalloc(sizeof(void __percpu *) * nr_pcpu_objects); if (!pcpu) return -1; - for (i = 0; i < 35000; i++) { + for (i = 0; i < nr_pcpu_objects; i++) { size = get_random_u32_inclusive(1, PAGE_SIZE / 4); /* * Maximum PAGE_SIZE */ - align = 1 << get_random_u32_inclusive(1, 11); + align = 1 << get_random_u32_inclusive(1, PAGE_SHIFT - 1); pcpu[i] = __alloc_percpu(size, align); if (!pcpu[i]) rv = -1; } - for (i = 0; i < 35000; i++) + for (i = 0; i < nr_pcpu_objects; i++) free_percpu(pcpu[i]); vfree(pcpu); diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh index d39096723fca..b23d705bf570 100755 --- a/tools/testing/selftests/mm/test_vmalloc.sh +++ b/tools/testing/selftests/mm/test_vmalloc.sh @@ -13,6 +13,9 @@ TEST_NAME="vmalloc" DRIVER="test_${TEST_NAME}" NUM_CPUS=`grep -c ^processor /proc/cpuinfo` +# Default number of times we allocate percpu objects: +NR_PCPU_OBJECTS=35000 + # 1 if fails exitcode=1 @@ -27,6 +30,8 @@ PERF_PARAM="sequential_test_order=1 test_repeat_count=3" SMOKE_PARAM="test_loop_count=10000 test_repeat_count=10" STRESS_PARAM="nr_threads=$NUM_CPUS test_repeat_count=20" +PCPU_OBJ_PARAM="nr_pcpu_objects=$NR_PCPU_OBJECTS" + check_test_requirements() { uid=$(id -u) @@ -47,12 +52,30 @@ check_test_requirements() fi } +check_memory_requirement() +{ + # The pcpu_alloc_test allocates nr_pcpu_objects per cpu. If the + # PAGE_SIZE is on the larger side it is easier to set a value + # that can cause oom events during testing. Since we are + # testing the functionality of vmalloc and not the oom-killer, + # calculate what is 90% of available memory and divide it by + # the number of online CPUs. + pages=$(($(getconf _AVPHYS_PAGES) * 90 / 100 / $NUM_CPUS)) + + if (($pages < $NR_PCPU_OBJECTS)); then + echo "Updated nr_pcpu_objects to 90% of available memory." + echo "nr_pcpu_objects is now set to: $pages." + PCPU_OBJ_PARAM="nr_pcpu_objects=$pages" + fi +} + run_performance_check() { echo "Run performance tests to evaluate how fast vmalloc allocation is." echo "It runs all test cases on one single CPU with sequential order." - modprobe $DRIVER $PERF_PARAM > /dev/null 2>&1 + check_memory_requirement + modprobe $DRIVER $PERF_PARAM $PCPU_OBJ_PARAM > /dev/null 2>&1 echo "Done." echo "Check the kernel message buffer to see the summary." } @@ -63,7 +86,8 @@ run_stability_check() echo "available test cases are run by NUM_CPUS workers simultaneously." echo "It will take time, so be patient." - modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1 + check_memory_requirement + modprobe $DRIVER $STRESS_PARAM $PCPU_OBJ_PARAM > /dev/null 2>&1 echo "Done." echo "Check the kernel ring buffer to see the summary." } @@ -74,7 +98,8 @@ run_smoke_check() echo "Please check $0 output how it can be used" echo "for deep performance analysis as well as stress testing." - modprobe $DRIVER $SMOKE_PARAM > /dev/null 2>&1 + check_memory_requirement + modprobe $DRIVER $SMOKE_PARAM $PCPU_OBJ_PARAM > /dev/null 2>&1 echo "Done." echo "Check the kernel ring buffer to see the summary." } -- cgit v1.2.3 From 77bcee8d4015a1191e1e3f5c5c51589086493ab0 Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Thu, 15 Jan 2026 03:15:36 +0000 Subject: alloc_tag: fix rw permission issue when handling boot parameter Boot parameters prefixed with "sysctl." are processed during the final stage of system initialization via kernel_init()-> do_sysctl_args(). When CONFIG_MEM_ALLOC_PROFILING_DEBUG is enabled, the sysctl.vm.mem_profiling entry is not writable and will cause a warning. Before run_init_process(), system initialization executes in kernel thread context. Use current->mm to distinguish sysctl writes during do_sysctl_args() from user-space triggered ones. And when the proc_handler is from do_sysctl_args(), always return success because the same value was already set by setup_early_mem_profiling() and this eliminates a permission denied warning. Link: https://lkml.kernel.org/r/20260115031536.164254-1-ranxiaokai627@163.com Signed-off-by: Ran Xiaokai Suggested-by: Suren Baghdasaryan Acked-by: Suren Baghdasaryan Cc: Kent Overstreet Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 846a5b5b44a4..00ae4673a271 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -776,8 +776,22 @@ EXPORT_SYMBOL(page_alloc_tagging_ops); static int proc_mem_profiling_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - if (!mem_profiling_support && write) - return -EINVAL; + if (write) { + /* + * Call from do_sysctl_args() which is a no-op since the same + * value was already set by setup_early_mem_profiling. + * Return success to avoid warnings from do_sysctl_args(). + */ + if (!current->mm) + return 0; + +#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG + /* User can't toggle profiling while debugging */ + return -EACCES; +#endif + if (!mem_profiling_support) + return -EINVAL; + } return proc_do_static_key(table, write, buffer, lenp, ppos); } @@ -787,11 +801,7 @@ static const struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", .data = &mem_alloc_profiling_key, -#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG - .mode = 0444, -#else .mode = 0644, -#endif .proc_handler = proc_mem_profiling_handler, }, }; -- cgit v1.2.3