diff options
Diffstat (limited to 'mm/memcontrol-v1.c')
-rw-r--r-- | mm/memcontrol-v1.c | 133 |
1 files changed, 117 insertions, 16 deletions
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index a071fa43d479..4b94731305b9 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -490,6 +490,19 @@ static void mem_cgroup_threshold(struct mem_cgroup *memcg) } /* Cgroup1: threshold notifications & softlimit tree updates */ + +/* + * Per memcg event counter is incremented at every pagein/pageout. With THP, + * it will be incremented by the number of pages. This counter is used + * to trigger some periodic events. This is straightforward and better + * than using jiffies etc. to handle periodic memcg event. + */ +enum mem_cgroup_events_target { + MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, + MEM_CGROUP_NTARGETS, +}; + struct memcg1_events_percpu { unsigned long nr_page_events; unsigned long targets[MEM_CGROUP_NTARGETS]; @@ -499,9 +512,9 @@ static void memcg1_charge_statistics(struct mem_cgroup *memcg, int nr_pages) { /* pagein of a big page is an event. So, ignore page size */ if (nr_pages > 0) - __count_memcg_events(memcg, PGPGIN, 1); + count_memcg_events(memcg, PGPGIN, 1); else { - __count_memcg_events(memcg, PGPGOUT, 1); + count_memcg_events(memcg, PGPGOUT, 1); nr_pages = -nr_pages; /* for event */ } @@ -568,8 +581,59 @@ void memcg1_commit_charge(struct folio *folio, struct mem_cgroup *memcg) local_irq_restore(flags); } -void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) +/** + * memcg1_swapout - transfer a memsw charge to swap + * @folio: folio whose memsw charge to transfer + * @entry: swap entry to move the charge to + * + * Transfer the memsw charge of @folio to @entry. + */ +void memcg1_swapout(struct folio *folio, swp_entry_t entry) { + struct mem_cgroup *memcg, *swap_memcg; + unsigned int nr_entries; + + VM_BUG_ON_FOLIO(folio_test_lru(folio), folio); + VM_BUG_ON_FOLIO(folio_ref_count(folio), folio); + + if (mem_cgroup_disabled()) + return; + + if (!do_memsw_account()) + return; + + memcg = folio_memcg(folio); + + VM_WARN_ON_ONCE_FOLIO(!memcg, folio); + if (!memcg) + return; + + /* + * In case the memcg owning these pages has been offlined and doesn't + * have an ID allocated to it anymore, charge the closest online + * ancestor for the swap instead and transfer the memory+swap charge. + */ + swap_memcg = mem_cgroup_id_get_online(memcg); + nr_entries = folio_nr_pages(folio); + /* Get references for the tail pages, too */ + if (nr_entries > 1) + mem_cgroup_id_get_many(swap_memcg, nr_entries - 1); + mod_memcg_state(swap_memcg, MEMCG_SWAP, nr_entries); + + swap_cgroup_record(folio, mem_cgroup_id(swap_memcg), entry); + + folio_unqueue_deferred_split(folio); + folio->memcg_data = 0; + + if (!mem_cgroup_is_root(memcg)) + page_counter_uncharge(&memcg->memory, nr_entries); + + if (memcg != swap_memcg) { + if (!mem_cgroup_is_root(swap_memcg)) + page_counter_charge(&swap_memcg->memsw, nr_entries); + page_counter_uncharge(&memcg->memsw, nr_entries); + } + /* * Interrupts should be disabled here because the caller holds the * i_pages lock which is taken with interrupts-off. It is @@ -581,6 +645,42 @@ void memcg1_swapout(struct folio *folio, struct mem_cgroup *memcg) memcg1_charge_statistics(memcg, -folio_nr_pages(folio)); preempt_enable_nested(); memcg1_check_events(memcg, folio_nid(folio)); + + css_put(&memcg->css); +} + +/* + * memcg1_swapin - uncharge swap slot + * @entry: the first swap entry for which the pages are charged + * @nr_pages: number of pages which will be uncharged + * + * Call this function after successfully adding the charged page to swapcache. + * + * Note: This function assumes the page for which swap slot is being uncharged + * is order 0 page. + */ +void memcg1_swapin(swp_entry_t entry, unsigned int nr_pages) +{ + /* + * Cgroup1's unified memory+swap counter has been charged with the + * new swapcache page, finish the transfer by uncharging the swap + * slot. The swap slot would also get uncharged when it dies, but + * it can stick around indefinitely and we'd count the page twice + * the entire time. + * + * Cgroup2 has separate resource counters for memory and swap, + * so this is a non-issue here. Memory and swap charge lifetimes + * correspond 1:1 to page and swap slot lifetimes: we charge the + * page to memory here, and uncharge swap when the slot is freed. + */ + if (do_memsw_account()) { + /* + * The swap entry might not get freed for a long time, + * let's not wait for it. The page already received a + * memory+swap charge, drop the swap entry duplicate. + */ + mem_cgroup_uncharge_swap(entry, nr_pages); + } } void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, @@ -589,7 +689,7 @@ void memcg1_uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, unsigned long flags; local_irq_save(flags); - __count_memcg_events(memcg, PGPGOUT, pgpgout); + count_memcg_events(memcg, PGPGOUT, pgpgout); __this_cpu_add(memcg->events_percpu->nr_page_events, nr_memory); memcg1_check_events(memcg, nid); local_irq_restore(flags); @@ -899,7 +999,7 @@ static void memcg_event_remove(struct work_struct *work) * * Called with wqh->lock held and interrupts disabled. */ -static int memcg_event_wake(wait_queue_entry_t *wait, unsigned mode, +static int memcg_event_wake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { struct mem_cgroup_event *event = @@ -1040,13 +1140,13 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, } else if (!strcmp(name, "memory.oom_control")) { pr_warn_once("oom_control is deprecated and will be removed. " "Please report your usecase to linux-mm-@kvack.org" - " if you depend on this functionality. \n"); + " if you depend on this functionality.\n"); event->register_event = mem_cgroup_oom_register_event; event->unregister_event = mem_cgroup_oom_unregister_event; } else if (!strcmp(name, "memory.pressure_level")) { pr_warn_once("pressure_level is deprecated and will be removed. " "Please report your usecase to linux-mm-@kvack.org " - "if you depend on this functionality. \n"); + "if you depend on this functionality.\n"); event->register_event = vmpressure_register_event; event->unregister_event = vmpressure_unregister_event; } else if (!strcmp(name, "memory.memsw.usage_in_bytes")) { @@ -1134,8 +1234,8 @@ static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg) failed = iter; mem_cgroup_iter_break(memcg, iter); break; - } else - iter->oom_lock = true; + } + iter->oom_lock = true; } if (failed) { @@ -1202,7 +1302,7 @@ struct oom_wait_info { }; static int memcg_oom_wake_function(wait_queue_entry_t *wait, - unsigned mode, int sync, void *arg) + unsigned int mode, int sync, void *arg) { struct mem_cgroup *wake_memcg = (struct mem_cgroup *)arg; struct mem_cgroup *oom_wait_memcg; @@ -1644,7 +1744,7 @@ static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, unsigned long nr = 0; enum lru_list lru; - VM_BUG_ON((unsigned)nid >= nr_node_ids); + VM_BUG_ON((unsigned int)nid >= nr_node_ids); for_each_lru(lru) { if (!(BIT(lru) & lru_mask)) @@ -1855,9 +1955,11 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css, if (val > MAX_SWAPPINESS) return -EINVAL; - if (!mem_cgroup_is_root(memcg)) + if (!mem_cgroup_is_root(memcg)) { + pr_info_once("Per memcg swappiness does not exist in cgroup v2. " + "See memory.reclaim or memory.swap.max there\n "); WRITE_ONCE(memcg->swappiness, val); - else + } else WRITE_ONCE(vm_swappiness, val); return 0; @@ -1881,7 +1983,7 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css, pr_warn_once("oom_control is deprecated and will be removed. " "Please report your usecase to linux-mm-@kvack.org if you " - "depend on this functionality. \n"); + "depend on this functionality.\n"); /* cannot set to root cgroup and only 0 and 1 are allowed */ if (mem_cgroup_is_root(memcg) || !((val == 0) || (val == 1))) @@ -2096,8 +2198,7 @@ bool memcg1_alloc_events(struct mem_cgroup *memcg) void memcg1_free_events(struct mem_cgroup *memcg) { - if (memcg->events_percpu) - free_percpu(memcg->events_percpu); + free_percpu(memcg->events_percpu); } static int __init memcg1_init(void) |