summaryrefslogtreecommitdiff
path: root/include/linux/damon.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-19 20:14:34 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-19 20:14:34 +0300
commita552c81ff4a16738ca5a44a177d552eb38d552ce (patch)
tree82800368fc5bc70e728875edb52777521f082ca8 /include/linux/damon.h
parentc98d767b34574be82b74d77d02264a830ae1cadd (diff)
parente3d8707358ea76b78bdec9928937bb9a797f2c8f (diff)
downloadlinux-a552c81ff4a16738ca5a44a177d552eb38d552ce.tar.xz
Merge tag 'mm-stable-2026-06-18-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - "selftests/mm: clean up build output and verbosity" (Li Wang) Remove some noise from the MM selftests build - "mm: Free contiguous order-0 pages efficiently" (Ryan Roberts) Speed up the freeing of a batch of 0-order pages by first scanning them for coalescing opportunities. This is applicable to vfree() and to the releasing of frozen pages - "mm/damon: introduce DAMOS failed region quota charge ratio" (SeongJae Park) Address a DAMOS usability issue: The DAMOS quota often exhausts prematurely because it charges for all memory attempted, causing slow and inconsistent performance when actions fail on unreclaimable memory. To fix this, a new feature lets users set a smaller, flexible quota charge ratio (via a numerator and denominator) for failed regions. Since failed actions cause less overhead, reducing their quota cost ensures more predictable and efficient DAMOS processing - "selftests/cgroup: improve zswap tests robustness and support large page sizes" (Li Wang) Fix various spurious failures and improves the overall robustness of the cgroup zswap selftests - "fix MAP_DROPPABLE not supported errno" (Anthony Yznaga) Fix an issue in the mlock selftests on arm32 - "mm: huge_memory: clean up defrag sysfs with shared" (Breno Leitao) Some maintenance work in the huge_memory code - "treewide: fixup gfp_t printks" (Brendan Jackman) Use the special vprintf() gfp_t conversion in various places - "mm: Fix vmemmap optimization accounting and initialization" (Muchun Song) Fix several bugs in the vmemmap optimization, mainly around incorrect page accounting and memmap initialization in the DAX and memory hotplug paths. It also fixes pageblock migratetype initialization and struct page initialization for ZONE_DEVICE compound pages - "mm/damon: repost non-hotfix reviewed patches in damon/next tree" A sprinkle of unrelated minor bugfixes for DAMON - "mm: remove page_mapped()" (David Hildenbrand) Remove this function from the tree, replacing it with folio_mapped() - "mm/damon: let DAMON be paused and resumed" (SeongJae Park) Allow DAMON to be paused and resumed without losing its current state - "kasan: hw_tags: Disable tagging for stack and page-tables" (Muhammad Usama Anjum) Simplify and speed up kasan by removing its ineffective tagging of stacks and page tables - "mm/damon/reclaim,lru_sort: monitor all system rams by default" (SeongJae Park) Simplify deployment on diverse hardware like NUMA systems by updating DAMON_RECLAIM and DAMON_LRU_SORT to automatically monitor the physical address range covering all System RAM areas by default, replacing the overly restrictive behavior that only targeted the single largest memory block to save on negligible overhead - "mm/damon/sysfs: document filters/ directory as deprecated" (SeongJae Park) Update some DAMON docs - "mm: use spinlock guards for zone lock" (Dmitry Ilvokhin) Switch zone->lock handling over to using the guard() mechanisms - "mm/filemap: tighten mmap_miss hit accounting" (fujunjie) Fix a flaw where the mmap_miss counter over-credited page cache hits during fault-arounds and page-fault retries. This results in significant reduction of redundant synchronous mmap readahead I/O, drastically cutting down execution time and gigabytes read for sparse random or strided memory access workloads - "selftests/cgroup: Fix false positive failures in test_percpu_basic" (Li Wang) Fix a couple of false-positives in the cgroup kmem selftests - "mm/damon/reclaim: support monitoring intervals auto-tuning" (SeongJae Park) Add a new parameter to DAMON permitting DAMON_RECLAIM to automatically tune DAMON's sampling and aggregation intervals - "mm/damon/stat: add kdamond_pid parameter" (SeongJae Park) Change DAMON_STAT to provide the pid of its kdamond - "mm/kmemleak: dedupe verbose scan output" (Breno Leitao) Remove large amounts of duplicated backtraces from the verbose-mode kmemleak output - "mm: remove CONFIG_HAVE_BOOTMEM_INFO_NODE (Part 1)" (David Hildenbrand) Reduce our use of CONFIG_HAVE_BOOTMEM_INFO_NODE, with a view to removing it entirely in a later series - "mm/damon: validate min_region_size to be power of 2" (Liew Rui Yan) Prevent users from passing a non-power-of-2 value of `addr_unit', as this later results in undesirable behavior - "mm: document read_pages and simplify usage" (Frederick Mayle) - "tools/mm/page-types: Fix misc bugs" (Ye Liu) Fix three issues in tools/mm/page-types.c - "mm: misc cleanups from __GFP_UNMAPPED series" (Brendan Jackman) Implement several cleanups in the page allocator and related code - "mm, swap: swap table phase IV: unify allocation" (Kairui Song) Unify the allocation and charging of anon and shmem swap in folios, provides better synchronization, consolidates the metadata management, hence dropping the static array and map, and improves performance - "mm/damon: introduce data attributes monitoring" (SeongJae Park( Extend DAMON to monitor general data attributes other than accesses - "mm/vmalloc: free unused pages on vrealloc() shrink" (Shivam Kalra) Implement the TODO in vrealloc() to unmap and free unused pages when shrinking across a page boundary - "mm/damon: documentation and comment fixes" (niecheng) - "remove mmap_action success, error hooks" (Lorenzo Stoakes) Eliminate custom hooks from mmap_action by removing the problematic success_hook which allowed drivers to improperly access uninitialized VMAs. It replaces the error_hook with a simple error-code field and updates the memory char driver accordingly - "mm/damon: minor improvements for code readability and tests" (SeongJae Park) - "mm/damon: fix macro arguments and clarify quota goals doc" (Maksym Shcherba) - "userfaultfd: merge fs/userfaultfd.c into mm/userfaultfd.c" (Mike Rapoport) - "mm/mglru: improve reclaim loop and dirty folio" (Kairui Song and others) Clean up and slightly improves MGLRU's reclaim loop and dirty writeback handling. Large performance improvements are measured - "use vma locks for proc/pid/{smaps|numa_maps} reads" (Suren Baghdasaryan) Use per-vma locks when reading /proc/pid/smaps and numa_maps similar to reduce contention on central mmap_lock - "refactors thpsize_shmem_enabled_store() and thpsize_shmem_enabled_show()" (Ran Xiaokai) Some cleanup work in the THP code - "selftests/memfd: fix compilation warnings" (Konstantin Khorenko) Fix a few build glitches in the memfd selftest code. - "memcg: shrink obj_stock_pcp and cache multiple objcgs" (Shakeel Butt) Resolve a 68% performance regression caused by NUMA-node cache thrashing around struct obj_stock_pcp by shrinking its existing fields and expanding it into a multi-slot array that caches up to five obj_cgroup pointers per CPU, allowing per-node variants of the same memcg to coexist within a single 64-byte cache line. - "zram: writeback fixes" (Sergey Senozhatsky) address a couple of unrelated zram writeback issues - "mm: switch THP shrinker to list_lru" (Johannes Weiner) Resolve NUMA-awareness issues and streamlines callsite interaction by refactoring and extending the list_lru API to completely replace the complex, open-coded deferred split queue for Transparent Huge Pages - "mm: improve large folio readahead for exec memory" (Usama Arif) Improve large-folio readahead on systems like 64K-page arm64 by preventing the mmap_miss check from permanently disabling target-oriented VM_EXEC readahead, and by generalizing the force_thp_readahead gate to support mappings with any usefully large maximum folio order under the cache cap. - "userfaultfd/pagemap: pre-existing fixes" (Kiryl Shutsemau) Fix a bunch of minor issues in the userfaultfd/pagemap, all of which were flagged by Sashiko review of proposed new material - "mm/sparse-vmemmap: Provide generic vmemmap_set_pmd() and vmemmap_check_pmd()" (Muchun Song) Provide generic versions of these two functions so the four arch-specific implementations can be removed. - "mm/swap, PM: hibernate: fix swapoff race in uswsusp by pinning swap device" (Youngjun Park) Address a uswsusp-vs-swapoff race and reduces the swap device reference taking/releasing frequency. - "mm/hmm: A fix and a selftest" (Dev Jain) * tag 'mm-stable-2026-06-18-09-26' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (321 commits) selftests/mm/hmm-tests: test pagemap reads of PMD device-private entries fs/proc/task_mmu: do not warn on seeing non-migration pmd entry lib/test_hmm: check alloc_page_vma() return value and handle OOM mm/compaction: cap compact_gap() at COMPACT_CLUSTER_MAX mm/swap: remove redundant swap device reference in alloc/free mm/swap, PM: hibernate: fix swapoff race in uswsusp by pinning swap device mm/filemap: use folio_next_index() for start vmalloc: fix NULL pointer dereference in is_vm_area_hugepages() sparc/mm: drop vmemmap_check_pmd helper and use generic code loongarch/mm: drop vmemmap_check_pmd helper and use generic code riscv/mm: drop vmemmap_pmd helpers and use generic code arm64/mm: drop vmemmap_pmd helpers and use generic code mm/sparse-vmemmap: provide generic vmemmap_set_pmd() and vmemmap_check_pmd() rust: page: mark Page::nid as inline userfaultfd: build __VMA_UFFD_FLAGS from config-gated masks userfaultfd: gate must_wait writability check on pte_present() mm/huge_memory: preserve pmd_swp_uffd_wp on device-private PMD downgrade fs/proc/task_mmu: fix hugetlb self-deadlock in pagemap_scan_pte_hole() fs/proc/task_mmu: use huge_page_size() in pagemap_scan_hugetlb_entry() fs/proc/task_mmu: fix make_uffd_wp_huge_pte() prot-update race ...
Diffstat (limited to 'include/linux/damon.h')
-rw-r--r--include/linux/damon.h136
1 files changed, 111 insertions, 25 deletions
diff --git a/include/linux/damon.h b/include/linux/damon.h
index f2cdb7c3f5e6..6f7edb3590ef 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -8,23 +8,20 @@
#ifndef _DAMON_H_
#define _DAMON_H_
+#include <linux/math64.h>
#include <linux/memcontrol.h>
#include <linux/mutex.h>
+#include <linux/prandom.h>
#include <linux/time64.h>
#include <linux/types.h>
-#include <linux/random.h>
/* Minimal region size. Every damon_region is aligned by this. */
#define DAMON_MIN_REGION_SZ PAGE_SIZE
+/* Maximum number of monitoring probes. */
+#define DAMON_MAX_PROBES (4)
/* Max priority score for DAMON-based operation schemes */
#define DAMOS_MAX_SCORE (99)
-/* Get a random number in [l, r) */
-static inline unsigned long damon_rand(unsigned long l, unsigned long r)
-{
- return l + get_random_u32_below(r - l);
-}
-
/**
* struct damon_addr_range - Represents an address region of [@start, @end).
* @start: Start address of the region (inclusive).
@@ -52,6 +49,7 @@ struct damon_size_range {
* @nr_accesses: Access frequency of this region.
* @nr_accesses_bp: @nr_accesses in basis point (0.01%) that updated for
* each sampling interval.
+ * @probe_hits: Number of probe-positive region samples.
* @list: List head for siblings.
* @age: Age of this region.
*
@@ -80,6 +78,7 @@ struct damon_region {
unsigned long sampling_addr;
unsigned int nr_accesses;
unsigned int nr_accesses_bp;
+ unsigned char probe_hits[DAMON_MAX_PROBES];
struct list_head list;
unsigned int age;
@@ -121,6 +120,7 @@ struct damon_target {
* @DAMOS_PAGEOUT: Reclaim the region.
* @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE.
* @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE.
+ * @DAMOS_COLLAPSE: Call ``madvise()`` for the region with MADV_COLLAPSE.
* @DAMOS_LRU_PRIO: Prioritize the region on its LRU lists.
* @DAMOS_LRU_DEPRIO: Deprioritize the region on its LRU lists.
* @DAMOS_MIGRATE_HOT: Migrate the regions prioritizing warmer regions.
@@ -140,6 +140,7 @@ enum damos_action {
DAMOS_PAGEOUT,
DAMOS_HUGEPAGE,
DAMOS_NOHUGEPAGE,
+ DAMOS_COLLAPSE,
DAMOS_LRU_PRIO,
DAMOS_LRU_DEPRIO,
DAMOS_MIGRATE_HOT,
@@ -159,6 +160,8 @@ enum damos_action {
* @DAMOS_QUOTA_NODE_MEMCG_FREE_BP: MemFree ratio of a node for a cgroup.
* @DAMOS_QUOTA_ACTIVE_MEM_BP: Active to total LRU memory ratio.
* @DAMOS_QUOTA_INACTIVE_MEM_BP: Inactive to total LRU memory ratio.
+ * @DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP: Scheme-eligible memory ratio of a
+ * node in basis points (0-10000).
* @NR_DAMOS_QUOTA_GOAL_METRICS: Number of DAMOS quota goal metrics.
*
* Metrics equal to larger than @NR_DAMOS_QUOTA_GOAL_METRICS are unsupported.
@@ -172,6 +175,7 @@ enum damos_quota_goal_metric {
DAMOS_QUOTA_NODE_MEMCG_FREE_BP,
DAMOS_QUOTA_ACTIVE_MEM_BP,
DAMOS_QUOTA_INACTIVE_MEM_BP,
+ DAMOS_QUOTA_NODE_ELIGIBLE_MEM_BP,
NR_DAMOS_QUOTA_GOAL_METRICS,
};
@@ -233,6 +237,8 @@ enum damos_quota_goal_tuner {
* @goals: Head of quota tuning goals (&damos_quota_goal) list.
* @goal_tuner: Goal-based @esz tuning algorithm to use.
* @esz: Effective size quota in bytes.
+ * @fail_charge_num: Failed regions charge rate numerator.
+ * @fail_charge_denom: Failed regions charge rate denominator.
*
* @weight_sz: Weight of the region's size for prioritization.
* @weight_nr_accesses: Weight of the region's nr_accesses for prioritization.
@@ -262,6 +268,10 @@ enum damos_quota_goal_tuner {
*
* The resulting effective size quota in bytes is set to @esz.
*
+ * For DAMOS action applying failed amount of regions, charging those same to
+ * those that the action has successfully applied may be unfair. For the
+ * reason, 'the size * @fail_charge_num / @fail_charge_denom' is charged.
+ *
* For selecting regions within the quota, DAMON prioritizes current scheme's
* target memory regions using the &struct damon_operations->get_scheme_score.
* You could customize the prioritization logic by setting &weight_sz,
@@ -276,6 +286,9 @@ struct damos_quota {
enum damos_quota_goal_tuner goal_tuner;
unsigned long esz;
+ unsigned int fail_charge_num;
+ unsigned int fail_charge_denom;
+
unsigned int weight_sz;
unsigned int weight_nr_accesses;
unsigned int weight_age;
@@ -617,6 +630,7 @@ enum damon_ops_id {
* @update: Update operations-related data structures.
* @prepare_access_checks: Prepare next access check of target regions.
* @check_accesses: Check the accesses to target regions.
+ * @apply_probes: Apply probes for each region.
* @get_scheme_score: Get the score of a region for a scheme.
* @apply_scheme: Apply a DAMON-based operation scheme.
* @target_valid: Determine if the target is valid.
@@ -643,6 +657,8 @@ enum damon_ops_id {
* last preparation and update the number of observed accesses of each region.
* It should also return max number of observed accesses that made as a result
* of its update. The value will be used for regions adjustment threshold.
+ * @apply_probes should apply the data attribute probes to each region and
+ * accordingly update the probe hits counter of the region.
* @get_scheme_score should return the priority score of a region for a scheme
* as an integer in [0, &DAMOS_MAX_SCORE].
* @apply_scheme is called from @kdamond when a region for user provided
@@ -660,6 +676,7 @@ struct damon_operations {
void (*update)(struct damon_ctx *context);
void (*prepare_access_checks)(struct damon_ctx *context);
unsigned int (*check_accesses)(struct damon_ctx *context);
+ void (*apply_probes)(struct damon_ctx *context);
int (*get_scheme_score)(struct damon_ctx *context,
struct damon_region *r, struct damos *scheme);
unsigned long (*apply_scheme)(struct damon_ctx *context,
@@ -722,6 +739,47 @@ struct damon_intervals_goal {
};
/**
+ * enum damon_filter_type - Type of &struct damon_filter
+ *
+ * @DAMON_FILTER_TYPE_ANON: Anonymous pages.
+ * @DAMON_FILTER_TYPE_MEMCG: Specific memcg's pages.
+ */
+enum damon_filter_type {
+ DAMON_FILTER_TYPE_ANON,
+ DAMON_FILTER_TYPE_MEMCG,
+};
+
+/**
+ * struct damon_filter - DAMON region filter for &struct damon_probe.
+ *
+ * @type: Type of the region.
+ * @matching: Whether this filter is for the type-matching ones.
+ * @allow: Whether the @type-@matching ones should pass this filter.
+ * @memcg_id: Memcg id of the question if @type is DAMON_FILTER_MEMCG.
+ * @list: Siblings list.
+ */
+struct damon_filter {
+ enum damon_filter_type type;
+ bool matching;
+ bool allow;
+ union {
+ u64 memcg_id;
+ };
+ struct list_head list;
+};
+
+/**
+ * struct damon_probe - Data region attribute probe.
+ *
+ * @filters: Filters for assessing if a given region is for this probe.
+ * @list: Siblings list.
+ */
+struct damon_probe {
+ struct list_head filters;
+ struct list_head list;
+};
+
+/**
* struct damon_attrs - Monitoring attributes for accuracy/overhead control.
*
* @sample_interval: The time between access samplings.
@@ -787,6 +845,7 @@ struct damon_attrs {
* @ops: Set of monitoring operations for given use cases.
* @addr_unit: Scale factor for core to ops address conversion.
* @min_region_sz: Minimum region size.
+ * @pause: Pause kdamond main loop.
* @adaptive_targets: Head of monitoring targets (&damon_target) list.
* @schemes: Head of schemes (&damos) list.
*/
@@ -838,13 +897,34 @@ struct damon_ctx {
/* public: */
struct damon_operations ops;
+ struct list_head probes;
unsigned long addr_unit;
unsigned long min_region_sz;
+ bool pause;
struct list_head adaptive_targets;
struct list_head schemes;
+
+ /* Per-ctx PRNG state for damon_rand(); kdamond is the sole consumer. */
+ struct rnd_state rnd_state;
};
+/* Get a random number in [@l, @r) using @ctx's lockless PRNG. */
+static inline unsigned long damon_rand(struct damon_ctx *ctx,
+ unsigned long l, unsigned long r)
+{
+ unsigned long span = r - l;
+ u64 rnd;
+
+ if (span <= U32_MAX) {
+ rnd = prandom_u32_state(&ctx->rnd_state);
+ return l + (unsigned long)((rnd * span) >> 32);
+ }
+ rnd = ((u64)prandom_u32_state(&ctx->rnd_state) << 32) |
+ prandom_u32_state(&ctx->rnd_state);
+ return l + mul_u64_u64_shr(rnd, span, 64);
+}
+
static inline struct damon_region *damon_next_region(struct damon_region *r)
{
return container_of(r->list.next, struct damon_region, list);
@@ -870,15 +950,26 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
return r->ar.end - r->ar.start;
}
+#define damon_for_each_filter(f, p) \
+ list_for_each_entry(f, &(p)->filters, list)
+
+#define damon_for_each_filter_safe(f, next, p) \
+ list_for_each_entry_safe(f, next, &(p)->filters, list)
+
+#define damon_for_each_probe(p, ctx) \
+ list_for_each_entry(p, &(ctx)->probes, list)
+
+#define damon_for_each_probe_safe(p, next, ctx) \
+ list_for_each_entry_safe(p, next, &(ctx)->probes, list)
#define damon_for_each_region(r, t) \
- list_for_each_entry(r, &t->regions_list, list)
+ list_for_each_entry(r, &(t)->regions_list, list)
#define damon_for_each_region_from(r, t) \
- list_for_each_entry_from(r, &t->regions_list, list)
+ list_for_each_entry_from(r, &(t)->regions_list, list)
#define damon_for_each_region_safe(r, next, t) \
- list_for_each_entry_safe(r, next, &t->regions_list, list)
+ list_for_each_entry_safe(r, next, &(t)->regions_list, list)
#define damon_for_each_target(t, ctx) \
list_for_each_entry(t, &(ctx)->adaptive_targets, list)
@@ -893,7 +984,7 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
list_for_each_entry_safe(s, next, &(ctx)->schemes, list)
#define damos_for_each_quota_goal(goal, quota) \
- list_for_each_entry(goal, &quota->goals, list)
+ list_for_each_entry(goal, &(quota)->goals, list)
#define damos_for_each_quota_goal_safe(goal, next, quota) \
list_for_each_entry_safe(goal, next, &(quota)->goals, list)
@@ -912,21 +1003,16 @@ static inline unsigned long damon_sz_region(struct damon_region *r)
#ifdef CONFIG_DAMON
-struct damon_region *damon_new_region(unsigned long start, unsigned long end);
+struct damon_filter *damon_new_filter(enum damon_filter_type type,
+ bool matching, bool allow);
+void damon_add_filter(struct damon_probe *probe, struct damon_filter *f);
+void damon_destroy_filter(struct damon_filter *f);
-/*
- * Add a region between two other regions
- */
-static inline void damon_insert_region(struct damon_region *r,
- struct damon_region *prev, struct damon_region *next,
- struct damon_target *t)
-{
- __list_add(&r->list, &prev->list, &next->list);
- t->nr_regions++;
-}
+struct damon_probe *damon_new_probe(void);
+void damon_add_probe(struct damon_ctx *ctx, struct damon_probe *probe);
+
+struct damon_region *damon_new_region(unsigned long start, unsigned long end);
-void damon_add_region(struct damon_region *r, struct damon_target *t);
-void damon_destroy_region(struct damon_region *r, struct damon_target *t);
int damon_set_regions(struct damon_target *t, struct damon_addr_range *ranges,
unsigned int nr_ranges, unsigned long min_region_sz);
void damon_update_region_access_rate(struct damon_region *r, bool accessed,
@@ -994,7 +1080,7 @@ int damon_kdamond_pid(struct damon_ctx *ctx);
int damon_call(struct damon_ctx *ctx, struct damon_call_control *control);
int damos_walk(struct damon_ctx *ctx, struct damos_walk_control *control);
-int damon_set_region_biggest_system_ram_default(struct damon_target *t,
+int damon_set_region_system_rams_default(struct damon_target *t,
unsigned long *start, unsigned long *end,
unsigned long addr_unit,
unsigned long min_region_sz);