diff options
author | Ankur Arora <ankur.a.arora@oracle.com> | 2025-09-17 18:24:11 +0300 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2025-09-19 18:43:59 +0300 |
commit | a8f0992998af9ea1135ee6415c68c1d84cb5ad22 (patch) | |
tree | da51f8393ca48c82b9f5bf0a80cc2ba0d69c395a /tools/perf/bench/mem-functions.c | |
parent | c3047f9a1ab457b60caa3b2baa2c605b935ca4f1 (diff) | |
download | linux-a8f0992998af9ea1135ee6415c68c1d84cb5ad22.tar.xz |
perf bench mem: Add mmap() workloads
Add two mmap() workloads: one that eagerly populates a region and
another that demand faults it in.
The intent is to probe the memory subsytem performance incurred
by mmap().
$ perf bench mem mmap -s 4gb -p 4kb -l 10 -f populate
# Running 'mem/mmap' benchmark:
# function 'populate' (Eagerly populated map())
# Copying 4gb bytes ...
1.811691 GB/sec
$ perf bench mem mmap -s 4gb -p 2mb -l 10 -f populate
# Running 'mem/mmap' benchmark:
# function 'populate' (Eagerly populated mmap())
# Copying 4gb bytes ...
12.272017 GB/sec
$ perf bench mem mmap -s 4gb -p 1gb -l 10 -f populate
# Running 'mem/mmap' benchmark:
# function 'populate' (Eagerly populated mmap())
# Copying 4gb bytes ...
17.085927 GB/sec
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Raghavendra K T <raghavendra.kt@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/bench/mem-functions.c')
-rw-r--r-- | tools/perf/bench/mem-functions.c | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 2a23bed8c2d3..2908a3a796c9 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -40,6 +40,7 @@ static const char *chunk_size_str = "0"; static unsigned int nr_loops = 1; static bool use_cycles; static int cycles_fd; +static unsigned int seed; static const struct option bench_common_options[] = { OPT_STRING('s', "size", &size_str, "1MB", @@ -81,6 +82,7 @@ struct bench_params { size_t chunk_size; unsigned int nr_loops; unsigned int page_shift; + unsigned int seed; }; struct bench_mem_info { @@ -98,6 +100,7 @@ typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *, void **, void **); typedef void *(*memcpy_t)(void *, const void *, size_t); typedef void *(*memset_t)(void *, int, size_t); +typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool); struct function { const char *name; @@ -108,6 +111,7 @@ struct function { union { memcpy_t memcpy; memset_t memset; + mmap_op_t mmap_op; }; } fn; }; @@ -160,6 +164,14 @@ static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e) return t; } +static void clock_accum(union bench_clock *a, union bench_clock *b) +{ + if (use_cycles) + a->cycles += b->cycles; + else + timeradd(&a->tv, &b->tv, &a->tv); +} + static double timeval2double(struct timeval *ts) { return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; @@ -271,6 +283,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * } p.page_shift = ilog2(page_size); + p.seed = seed; + if (!strncmp(function_str, "all", 3)) { for (i = 0; info->functions[i].name; i++) __bench_mem_function(info, &p, i); @@ -465,3 +479,85 @@ int bench_mem_memset(int argc, const char **argv) return bench_mem_common(argc, argv, &info); } + +static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random) +{ + unsigned long npages = size / (1 << page_shift); + unsigned long offset = 0, r = 0; + + for (unsigned long i = 0; i < npages; i++) { + if (random) + r = rand() % (1 << page_shift); + + *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i; + offset += 1 << page_shift; + } +} + +static int do_mmap(const struct function *r, struct bench_params *p, + void *src __maybe_unused, void *dst __maybe_unused, + union bench_clock *accum) +{ + union bench_clock start, end, diff; + mmap_op_t fn = r->fn.mmap_op; + bool populate = strcmp(r->name, "populate") == 0; + + if (p->seed) + srand(p->seed); + + for (unsigned int i = 0; i < p->nr_loops; i++) { + clock_get(&start); + dst = bench_mmap(p->size, populate, p->page_shift); + if (!dst) + goto out; + + fn(dst, p->size, p->page_shift, p->seed); + clock_get(&end); + diff = clock_diff(&start, &end); + clock_accum(accum, &diff); + + bench_munmap(dst, p->size); + } + + return 0; +out: + printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str, + p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large"); + return -1; +} + +static const char * const bench_mem_mmap_usage[] = { + "perf bench mem mmap <options>", + NULL +}; + +static const struct function mmap_functions[] = { + { .name = "demand", + .desc = "Demand loaded mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = "populate", + .desc = "Eagerly populated mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = NULL, } +}; + +int bench_mem_mmap(int argc, const char **argv) +{ + static const struct option bench_mmap_options[] = { + OPT_UINTEGER('r', "randomize", &seed, + "Seed to randomize page access offset."), + OPT_PARENT(bench_common_options), + OPT_END() + }; + + struct bench_mem_info info = { + .functions = mmap_functions, + .do_op = do_mmap, + .usage = bench_mem_mmap_usage, + .options = bench_mmap_options, + }; + + return bench_mem_common(argc, argv, &info); +} |