diff options
author | Yosry Ahmed <yosryahmed@google.com> | 2023-03-30 22:17:59 +0300 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2023-04-19 02:29:50 +0300 |
commit | 4009b2f1887036d30637bc06dd0ade7e18408bb3 (patch) | |
tree | c1d37c77b18d1669e0e279ef08abb0ae9a23b255 | |
parent | 9fad9aee1f267a8ad1f86b87ae70b2c4d6796164 (diff) | |
download | linux-4009b2f1887036d30637bc06dd0ade7e18408bb3.tar.xz |
workingset: memcg: sleep when flushing stats in workingset_refault()
In workingset_refault(), we call
mem_cgroup_flush_stats_atomic_ratelimited() to read accurate stats within
an RCU read section and with sleeping disallowed. Move the call above the
RCU read section to make it non-atomic.
Flushing is an expensive operation that scales with the number of cpus and
the number of cgroups in the system, so avoid doing it atomically where
possible.
Since workingset_refault() is the only caller of
mem_cgroup_flush_stats_atomic_ratelimited(), just make it non-atomic, and
rename it to mem_cgroup_flush_stats_ratelimited().
Link: https://lkml.kernel.org/r/20230330191801.1967435-7-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Averin <vasily.averin@linux.dev>
Cc: Zefan Li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 4 | ||||
-rw-r--r-- | mm/memcontrol.c | 4 | ||||
-rw-r--r-- | mm/workingset.c | 5 |
3 files changed, 7 insertions, 6 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3db355e6677f..222d7370134c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1039,7 +1039,7 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, void mem_cgroup_flush_stats(void); void mem_cgroup_flush_stats_atomic(void); -void mem_cgroup_flush_stats_atomic_ratelimited(void); +void mem_cgroup_flush_stats_ratelimited(void); void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); @@ -1541,7 +1541,7 @@ static inline void mem_cgroup_flush_stats_atomic(void) { } -static inline void mem_cgroup_flush_stats_atomic_ratelimited(void) +static inline void mem_cgroup_flush_stats_ratelimited(void) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 06786dea50b8..e9e79ceb9a11 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -674,10 +674,10 @@ void mem_cgroup_flush_stats_atomic(void) do_flush_stats(true); } -void mem_cgroup_flush_stats_atomic_ratelimited(void) +void mem_cgroup_flush_stats_ratelimited(void) { if (time_after64(jiffies_64, READ_ONCE(flush_next_time))) - mem_cgroup_flush_stats_atomic(); + mem_cgroup_flush_stats(); } static void flush_memcg_stats_dwork(struct work_struct *w) diff --git a/mm/workingset.c b/mm/workingset.c index dab0c362b9e3..3025beee9b34 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -406,6 +406,9 @@ void workingset_refault(struct folio *folio, void *shadow) unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); eviction <<= bucket_order; + /* Flush stats (and potentially sleep) before holding RCU read lock */ + mem_cgroup_flush_stats_ratelimited(); + rcu_read_lock(); /* * Look up the memcg associated with the stored ID. It might @@ -461,8 +464,6 @@ void workingset_refault(struct folio *folio, void *shadow) lruvec = mem_cgroup_lruvec(memcg, pgdat); mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr); - - mem_cgroup_flush_stats_atomic_ratelimited(); /* * Compare the distance to the existing workingset size. We * don't activate pages that couldn't stay resident even if |