summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mmzone.h19
-rw-r--r--include/trace/events/vmscan.h51
-rw-r--r--mm/memory-tiers.c2
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/show_mem.c3
-rw-r--r--mm/vmscan.c29
-rw-r--r--mm/vmstat.c2
7 files changed, 91 insertions, 19 deletions
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8881198e85c6..3e51190a55e4 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1534,16 +1534,27 @@ static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
#include <linux/memory_hotplug.h>
void build_all_zonelists(pg_data_t *pgdat);
-void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
- enum zone_type highest_zoneidx);
-void kswapd_try_clear_hopeless(struct pglist_data *pgdat,
- unsigned int order, int highest_zoneidx);
bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
int highest_zoneidx, unsigned int alloc_flags,
long free_pages);
bool zone_watermark_ok(struct zone *z, unsigned int order,
unsigned long mark, int highest_zoneidx,
unsigned int alloc_flags);
+
+enum kswapd_clear_hopeless_reason {
+ KSWAPD_CLEAR_HOPELESS_OTHER = 0,
+ KSWAPD_CLEAR_HOPELESS_KSWAPD,
+ KSWAPD_CLEAR_HOPELESS_DIRECT,
+ KSWAPD_CLEAR_HOPELESS_PCP,
+};
+
+void wakeup_kswapd(struct zone *zone, gfp_t gfp_mask, int order,
+ enum zone_type highest_zoneidx);
+void kswapd_try_clear_hopeless(struct pglist_data *pgdat,
+ unsigned int order, int highest_zoneidx);
+void kswapd_clear_hopeless(pg_data_t *pgdat, enum kswapd_clear_hopeless_reason reason);
+bool kswapd_test_hopeless(pg_data_t *pgdat);
+
/*
* Memory initialization context, use to differentiate memory added by
* the platform statically or via memory hotplug interface.
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 490958fa10de..ea58e4656abf 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -40,6 +40,16 @@
{_VMSCAN_THROTTLE_CONGESTED, "VMSCAN_THROTTLE_CONGESTED"} \
) : "VMSCAN_THROTTLE_NONE"
+TRACE_DEFINE_ENUM(KSWAPD_CLEAR_HOPELESS_OTHER);
+TRACE_DEFINE_ENUM(KSWAPD_CLEAR_HOPELESS_KSWAPD);
+TRACE_DEFINE_ENUM(KSWAPD_CLEAR_HOPELESS_DIRECT);
+TRACE_DEFINE_ENUM(KSWAPD_CLEAR_HOPELESS_PCP);
+
+#define kswapd_clear_hopeless_reason_ops \
+ {KSWAPD_CLEAR_HOPELESS_KSWAPD, "KSWAPD"}, \
+ {KSWAPD_CLEAR_HOPELESS_DIRECT, "DIRECT"}, \
+ {KSWAPD_CLEAR_HOPELESS_PCP, "PCP"}, \
+ {KSWAPD_CLEAR_HOPELESS_OTHER, "OTHER"}
#define trace_reclaim_flags(file) ( \
(file ? RECLAIM_WB_FILE : RECLAIM_WB_ANON) | \
@@ -535,6 +545,47 @@ TRACE_EVENT(mm_vmscan_throttled,
__entry->usec_delayed,
show_throttle_flags(__entry->reason))
);
+
+TRACE_EVENT(mm_vmscan_kswapd_reclaim_fail,
+
+ TP_PROTO(int nid, int failures),
+
+ TP_ARGS(nid, failures),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(int, failures)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ __entry->failures = failures;
+ ),
+
+ TP_printk("nid=%d failures=%d",
+ __entry->nid, __entry->failures)
+);
+
+TRACE_EVENT(mm_vmscan_kswapd_clear_hopeless,
+
+ TP_PROTO(int nid, int reason),
+
+ TP_ARGS(nid, reason),
+
+ TP_STRUCT__entry(
+ __field(int, nid)
+ __field(int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->nid = nid;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("nid=%d reason=%s",
+ __entry->nid,
+ __print_symbolic(__entry->reason, kswapd_clear_hopeless_reason_ops))
+);
#endif /* _TRACE_VMSCAN_H */
/* This part must be outside protection */
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 7ec442776574..0ae8bec86346 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -955,7 +955,7 @@ static ssize_t demotion_enabled_store(struct kobject *kobj,
struct pglist_data *pgdat;
for_each_online_pgdat(pgdat)
- atomic_set(&pgdat->kswapd_failures, 0);
+ kswapd_clear_hopeless(pgdat, KSWAPD_CLEAR_HOPELESS_OTHER);
}
return count;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e779b18168de..2c70ba9d5cc6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2945,9 +2945,9 @@ static bool free_frozen_page_commit(struct zone *zone,
* 'hopeless node' to stay in that state for a while. Let
* kswapd work again by resetting kswapd_failures.
*/
- if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES &&
+ if (kswapd_test_hopeless(pgdat) &&
next_memory_node(pgdat->node_id) < MAX_NUMNODES)
- atomic_set(&pgdat->kswapd_failures, 0);
+ kswapd_clear_hopeless(pgdat, KSWAPD_CLEAR_HOPELESS_PCP);
}
return ret;
}
diff --git a/mm/show_mem.c b/mm/show_mem.c
index 3a4b5207635d..24078ac3e6bc 100644
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -278,8 +278,7 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
#endif
K(node_page_state(pgdat, NR_PAGETABLE)),
K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
- str_yes_no(atomic_read(&pgdat->kswapd_failures) >=
- MAX_RECLAIM_RETRIES),
+ str_yes_no(kswapd_test_hopeless(pgdat)),
K(node_page_state(pgdat, NR_BALLOON_PAGES)));
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5d9b1bce6f01..1d281174164e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -506,7 +506,7 @@ static bool skip_throttle_noprogress(pg_data_t *pgdat)
* If kswapd is disabled, reschedule if necessary but do not
* throttle as the system is likely near OOM.
*/
- if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES)
+ if (kswapd_test_hopeless(pgdat))
return true;
/*
@@ -6437,7 +6437,7 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)
int i;
bool wmark_ok;
- if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES)
+ if (kswapd_test_hopeless(pgdat))
return true;
for_each_managed_zone_pgdat(zone, pgdat, i, ZONE_NORMAL) {
@@ -6846,7 +6846,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order,
wake_up_all(&pgdat->pfmemalloc_wait);
/* Hopeless node, leave it to direct reclaim */
- if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES)
+ if (kswapd_test_hopeless(pgdat))
return true;
if (pgdat_balanced(pgdat, order, highest_zoneidx)) {
@@ -7111,8 +7111,11 @@ restart:
* watermark_high at this point. We need to avoid increasing the
* failure count to prevent the kswapd thread from stopping.
*/
- if (!sc.nr_reclaimed && !boosted)
- atomic_inc(&pgdat->kswapd_failures);
+ if (!sc.nr_reclaimed && !boosted) {
+ int fail_cnt = atomic_inc_return(&pgdat->kswapd_failures);
+ /* kswapd context, low overhead to trace every failure */
+ trace_mm_vmscan_kswapd_reclaim_fail(pgdat->node_id, fail_cnt);
+ }
out:
clear_reclaim_active(pgdat, highest_zoneidx);
@@ -7371,7 +7374,7 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
return;
/* Hopeless node, leave it to direct reclaim if possible */
- if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES ||
+ if (kswapd_test_hopeless(pgdat) ||
(pgdat_balanced(pgdat, order, highest_zoneidx) &&
!pgdat_watermark_boosted(pgdat, highest_zoneidx))) {
/*
@@ -7391,9 +7394,11 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
wake_up_interruptible(&pgdat->kswapd_wait);
}
-static void kswapd_clear_hopeless(pg_data_t *pgdat)
+void kswapd_clear_hopeless(pg_data_t *pgdat, enum kswapd_clear_hopeless_reason reason)
{
- atomic_set(&pgdat->kswapd_failures, 0);
+ /* Only trace actual resets, not redundant zero-to-zero */
+ if (atomic_xchg(&pgdat->kswapd_failures, 0))
+ trace_mm_vmscan_kswapd_clear_hopeless(pgdat->node_id, reason);
}
/*
@@ -7406,7 +7411,13 @@ void kswapd_try_clear_hopeless(struct pglist_data *pgdat,
unsigned int order, int highest_zoneidx)
{
if (pgdat_balanced(pgdat, order, highest_zoneidx))
- kswapd_clear_hopeless(pgdat);
+ kswapd_clear_hopeless(pgdat, current_is_kswapd() ?
+ KSWAPD_CLEAR_HOPELESS_KSWAPD : KSWAPD_CLEAR_HOPELESS_DIRECT);
+}
+
+bool kswapd_test_hopeless(pg_data_t *pgdat)
+{
+ return atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES;
}
#ifdef CONFIG_HIBERNATION
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 0f64c898f79f..23e176e1d09d 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1840,7 +1840,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n start_pfn: %lu"
"\n reserved_highatomic: %lu"
"\n free_highatomic: %lu",
- atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES,
+ kswapd_test_hopeless(pgdat),
zone->zone_start_pfn,
zone->nr_reserved_highatomic,
zone->nr_free_highatomic);