diff options
author | Tony Lindgren <tony@atomide.com> | 2016-03-30 20:36:06 +0300 |
---|---|---|
committer | Tony Lindgren <tony@atomide.com> | 2016-03-30 20:36:06 +0300 |
commit | 1809de7e7d37c585e01a1bcc583ea92b78fc759d (patch) | |
tree | 76c5b35c2b04eafce86a1a729c02ab705eba44bc /mm/memcontrol.c | |
parent | ebf24414809200915b9ddf7f109bba7c278c8210 (diff) | |
parent | 3ca4a238106dedc285193ee47f494a6584b6fd2f (diff) | |
download | linux-1809de7e7d37c585e01a1bcc583ea92b78fc759d.tar.xz |
Merge tag 'for-v4.6-rc/omap-fixes-a' of git://git.kernel.org/pub/scm/linux/kernel/git/pjw/omap-pending into omap-for-v4.6/fixes
ARM: OMAP2+: first hwmod fix for v4.6-rc
Fix a longstanding bug in the hwmod code that could cause
hardware SYSCONFIG register values to not match the kernel's
idea of what they should be, and that could result in lower
performance during IP block idle entry.
Basic build, boot, and PM test logs are available here:
http://www.pwsan.com/omap/testlogs/omap-hwmod-fixes-a-for-v4.6-rc/20160326231727/
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 283 |
1 files changed, 136 insertions, 147 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d06cae2de783..36db05fa8acb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -268,31 +268,6 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) return (memcg == root_mem_cgroup); } -/* - * We restrict the id in the range of [1, 65535], so it can fit into - * an unsigned short. - */ -#define MEM_CGROUP_ID_MAX USHRT_MAX - -static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg) -{ - return memcg->css.id; -} - -/* - * A helper function to get mem_cgroup from ID. must be called under - * rcu_read_lock(). The caller is responsible for calling - * css_tryget_online() if the mem_cgroup is used for charging. (dropping - * refcnt from swap can be called against removed memcg.) - */ -static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id) -{ - struct cgroup_subsys_state *css; - - css = css_from_id(id, &memory_cgrp_subsys); - return mem_cgroup_from_css(css); -} - #ifndef CONFIG_SLOB /* * This will be the memcg's index in each cache's ->memcg_params.memcg_caches. @@ -663,9 +638,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, __this_cpu_add(memcg->stat->nr_page_events, nr_pages); } -static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, - int nid, - unsigned int lru_mask) +unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, + int nid, unsigned int lru_mask) { unsigned long nr = 0; int zid; @@ -1176,12 +1150,9 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) */ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { - /* oom_info_lock ensures that parallel ooms do not interleave */ - static DEFINE_MUTEX(oom_info_lock); struct mem_cgroup *iter; unsigned int i; - mutex_lock(&oom_info_lock); rcu_read_lock(); if (p) { @@ -1225,7 +1196,6 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) pr_cont("\n"); } - mutex_unlock(&oom_info_lock); } /* @@ -1262,7 +1232,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) return limit; } -static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, +static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { struct oom_control oc = { @@ -1340,6 +1310,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, } unlock: mutex_unlock(&oom_lock); + return chosen; } #if MAX_NUMNODES > 1 @@ -1709,19 +1680,13 @@ cleanup: } /** - * mem_cgroup_begin_page_stat - begin a page state statistics transaction - * @page: page that is going to change accounted state - * - * This function must mark the beginning of an accounted page state - * change to prevent double accounting when the page is concurrently - * being moved to another memcg: + * lock_page_memcg - lock a page->mem_cgroup binding + * @page: the page * - * memcg = mem_cgroup_begin_page_stat(page); - * if (TestClearPageState(page)) - * mem_cgroup_update_page_stat(memcg, state, -1); - * mem_cgroup_end_page_stat(memcg); + * This function protects unlocked LRU pages from being moved to + * another cgroup and stabilizes their page->mem_cgroup binding. */ -struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) +void lock_page_memcg(struct page *page) { struct mem_cgroup *memcg; unsigned long flags; @@ -1730,25 +1695,18 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) * The RCU lock is held throughout the transaction. The fast * path can get away without acquiring the memcg->move_lock * because page moving starts with an RCU grace period. - * - * The RCU lock also protects the memcg from being freed when - * the page state that is going to change is the only thing - * preventing the page from being uncharged. - * E.g. end-writeback clearing PageWriteback(), which allows - * migration to go ahead and uncharge the page before the - * account transaction might be complete. */ rcu_read_lock(); if (mem_cgroup_disabled()) - return NULL; + return; again: memcg = page->mem_cgroup; if (unlikely(!memcg)) - return NULL; + return; if (atomic_read(&memcg->moving_account) <= 0) - return memcg; + return; spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { @@ -1759,21 +1717,23 @@ again: /* * When charge migration first begins, we can have locked and * unlocked page stat updates happening concurrently. Track - * the task who has the lock for mem_cgroup_end_page_stat(). + * the task who has the lock for unlock_page_memcg(). */ memcg->move_lock_task = current; memcg->move_lock_flags = flags; - return memcg; + return; } -EXPORT_SYMBOL(mem_cgroup_begin_page_stat); +EXPORT_SYMBOL(lock_page_memcg); /** - * mem_cgroup_end_page_stat - finish a page state statistics transaction - * @memcg: the memcg that was accounted against + * unlock_page_memcg - unlock a page->mem_cgroup binding + * @page: the page */ -void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) +void unlock_page_memcg(struct page *page) { + struct mem_cgroup *memcg = page->mem_cgroup; + if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -1785,7 +1745,7 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) rcu_read_unlock(); } -EXPORT_SYMBOL(mem_cgroup_end_page_stat); +EXPORT_SYMBOL(unlock_page_memcg); /* * size of first charge trial. "32" comes from vmscan.c's magic value. @@ -2361,9 +2321,6 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, struct page_counter *counter; int ret; - if (!memcg_kmem_online(memcg)) - return 0; - ret = try_charge(memcg, gfp, nr_pages); if (ret) return ret; @@ -2382,10 +2339,11 @@ int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order) { struct mem_cgroup *memcg; - int ret; + int ret = 0; memcg = get_mem_cgroup_from_mm(current->mm); - ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg); + if (!mem_cgroup_is_root(memcg)) + ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg); css_put(&memcg->css); return ret; } @@ -2755,39 +2713,48 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, return retval; } -static unsigned long tree_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static void tree_stat(struct mem_cgroup *memcg, unsigned long *stat) { struct mem_cgroup *iter; - unsigned long val = 0; + int i; - for_each_mem_cgroup_tree(iter, memcg) - val += mem_cgroup_read_stat(iter, idx); + memset(stat, 0, sizeof(*stat) * MEMCG_NR_STAT); - return val; + for_each_mem_cgroup_tree(iter, memcg) { + for (i = 0; i < MEMCG_NR_STAT; i++) + stat[i] += mem_cgroup_read_stat(iter, i); + } } -static unsigned long tree_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx) +static void tree_events(struct mem_cgroup *memcg, unsigned long *events) { struct mem_cgroup *iter; - unsigned long val = 0; + int i; - for_each_mem_cgroup_tree(iter, memcg) - val += mem_cgroup_read_events(iter, idx); + memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS); - return val; + for_each_mem_cgroup_tree(iter, memcg) { + for (i = 0; i < MEMCG_NR_EVENTS; i++) + events[i] += mem_cgroup_read_events(iter, i); + } } static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) { - unsigned long val; + unsigned long val = 0; if (mem_cgroup_is_root(memcg)) { - val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE); - val += tree_stat(memcg, MEM_CGROUP_STAT_RSS); - if (swap) - val += tree_stat(memcg, MEM_CGROUP_STAT_SWAP); + struct mem_cgroup *iter; + + for_each_mem_cgroup_tree(iter, memcg) { + val += mem_cgroup_read_stat(iter, + MEM_CGROUP_STAT_CACHE); + val += mem_cgroup_read_stat(iter, + MEM_CGROUP_STAT_RSS); + if (swap) + val += mem_cgroup_read_stat(iter, + MEM_CGROUP_STAT_SWAP); + } } else { if (!swap) val = page_counter_read(&memcg->memory); @@ -2853,6 +2820,9 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) { int memcg_id; + if (cgroup_memory_nokmem) + return 0; + BUG_ON(memcg->kmemcg_id >= 0); BUG_ON(memcg->kmem_state); @@ -2873,24 +2843,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) return 0; } -static int memcg_propagate_kmem(struct mem_cgroup *parent, - struct mem_cgroup *memcg) -{ - int ret = 0; - - mutex_lock(&memcg_limit_mutex); - /* - * If the parent cgroup is not kmem-online now, it cannot be - * onlined after this point, because it has at least one child - * already. - */ - if (memcg_kmem_online(parent) || - (cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_nokmem)) - ret = memcg_online_kmem(memcg); - mutex_unlock(&memcg_limit_mutex); - return ret; -} - static void memcg_offline_kmem(struct mem_cgroup *memcg) { struct cgroup_subsys_state *css; @@ -2949,10 +2901,6 @@ static void memcg_free_kmem(struct mem_cgroup *memcg) } } #else -static int memcg_propagate_kmem(struct mem_cgroup *parent, struct mem_cgroup *memcg) -{ - return 0; -} static int memcg_online_kmem(struct mem_cgroup *memcg) { return 0; @@ -2968,22 +2916,10 @@ static void memcg_free_kmem(struct mem_cgroup *memcg) static int memcg_update_kmem_limit(struct mem_cgroup *memcg, unsigned long limit) { - int ret = 0; + int ret; mutex_lock(&memcg_limit_mutex); - /* Top-level cgroup doesn't propagate from root */ - if (!memcg_kmem_online(memcg)) { - if (cgroup_is_populated(memcg->css.cgroup) || - (memcg->use_hierarchy && memcg_has_children(memcg))) - ret = -EBUSY; - if (ret) - goto out; - ret = memcg_online_kmem(memcg); - if (ret) - goto out; - } ret = page_counter_limit(&memcg->kmem, limit); -out: mutex_unlock(&memcg_limit_mutex); return ret; } @@ -4234,7 +4170,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) return &memcg->css; } - error = memcg_propagate_kmem(parent, memcg); + error = memcg_online_kmem(memcg); if (error) goto fail; @@ -4318,9 +4254,11 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); - mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX); - mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX); - memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX); + page_counter_limit(&memcg->memory, PAGE_COUNTER_MAX); + page_counter_limit(&memcg->swap, PAGE_COUNTER_MAX); + page_counter_limit(&memcg->memsw, PAGE_COUNTER_MAX); + page_counter_limit(&memcg->kmem, PAGE_COUNTER_MAX); + page_counter_limit(&memcg->tcpmem, PAGE_COUNTER_MAX); memcg->low = 0; memcg->high = PAGE_COUNTER_MAX; memcg->soft_limit = PAGE_COUNTER_MAX; @@ -4488,7 +4426,7 @@ static int mem_cgroup_move_account(struct page *page, VM_BUG_ON(compound && !PageTransHuge(page)); /* - * Prevent mem_cgroup_replace_page() from looking at + * Prevent mem_cgroup_migrate() from looking at * page->mem_cgroup of its source page while we change it. */ ret = -EBUSY; @@ -4923,9 +4861,9 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) lru_add_drain_all(); /* - * Signal mem_cgroup_begin_page_stat() to take the memcg's - * move_lock while we're moving its pages to another memcg. - * Then wait for already started RCU-only updates to finish. + * Signal lock_page_memcg() to take the memcg's move_lock + * while we're moving its pages to another memcg. Then wait + * for already started RCU-only updates to finish. */ atomic_inc(&mc.from->moving_account); synchronize_rcu(); @@ -5051,6 +4989,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long nr_pages; unsigned long high; int err; @@ -5061,6 +5000,11 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, memcg->high = high; + nr_pages = page_counter_read(&memcg->memory); + if (nr_pages > high) + try_to_free_mem_cgroup_pages(memcg, nr_pages - high, + GFP_KERNEL, true); + memcg_wb_domain_size_changed(memcg); return nbytes; } @@ -5082,6 +5026,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES; + bool drained = false; unsigned long max; int err; @@ -5090,9 +5036,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (err) return err; - err = mem_cgroup_resize_limit(memcg, max); - if (err) - return err; + xchg(&memcg->memory.limit, max); + + for (;;) { + unsigned long nr_pages = page_counter_read(&memcg->memory); + + if (nr_pages <= max) + break; + + if (signal_pending(current)) { + err = -EINTR; + break; + } + + if (!drained) { + drain_all_stock(memcg); + drained = true; + continue; + } + + if (nr_reclaims) { + if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, + GFP_KERNEL, true)) + nr_reclaims--; + continue; + } + + mem_cgroup_events(memcg, MEMCG_OOM, 1); + if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) + break; + } memcg_wb_domain_size_changed(memcg); return nbytes; @@ -5113,6 +5086,8 @@ static int memory_events_show(struct seq_file *m, void *v) static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); + unsigned long stat[MEMCG_NR_STAT]; + unsigned long events[MEMCG_NR_EVENTS]; int i; /* @@ -5126,22 +5101,27 @@ static int memory_stat_show(struct seq_file *m, void *v) * Current memory state: */ + tree_stat(memcg, stat); + tree_events(memcg, events); + seq_printf(m, "anon %llu\n", - (u64)tree_stat(memcg, MEM_CGROUP_STAT_RSS) * PAGE_SIZE); + (u64)stat[MEM_CGROUP_STAT_RSS] * PAGE_SIZE); seq_printf(m, "file %llu\n", - (u64)tree_stat(memcg, MEM_CGROUP_STAT_CACHE) * PAGE_SIZE); + (u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE); + seq_printf(m, "kernel_stack %llu\n", + (u64)stat[MEMCG_KERNEL_STACK] * PAGE_SIZE); + seq_printf(m, "slab %llu\n", + (u64)(stat[MEMCG_SLAB_RECLAIMABLE] + + stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); seq_printf(m, "sock %llu\n", - (u64)tree_stat(memcg, MEMCG_SOCK) * PAGE_SIZE); + (u64)stat[MEMCG_SOCK] * PAGE_SIZE); seq_printf(m, "file_mapped %llu\n", - (u64)tree_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED) * - PAGE_SIZE); + (u64)stat[MEM_CGROUP_STAT_FILE_MAPPED] * PAGE_SIZE); seq_printf(m, "file_dirty %llu\n", - (u64)tree_stat(memcg, MEM_CGROUP_STAT_DIRTY) * - PAGE_SIZE); + (u64)stat[MEM_CGROUP_STAT_DIRTY] * PAGE_SIZE); seq_printf(m, "file_writeback %llu\n", - (u64)tree_stat(memcg, MEM_CGROUP_STAT_WRITEBACK) * - PAGE_SIZE); + (u64)stat[MEM_CGROUP_STAT_WRITEBACK] * PAGE_SIZE); for (i = 0; i < NR_LRU_LISTS; i++) { struct mem_cgroup *mi; @@ -5153,12 +5133,17 @@ static int memory_stat_show(struct seq_file *m, void *v) mem_cgroup_lru_names[i], (u64)val * PAGE_SIZE); } + seq_printf(m, "slab_reclaimable %llu\n", + (u64)stat[MEMCG_SLAB_RECLAIMABLE] * PAGE_SIZE); + seq_printf(m, "slab_unreclaimable %llu\n", + (u64)stat[MEMCG_SLAB_UNRECLAIMABLE] * PAGE_SIZE); + /* Accumulated memory events */ seq_printf(m, "pgfault %lu\n", - tree_events(memcg, MEM_CGROUP_EVENTS_PGFAULT)); + events[MEM_CGROUP_EVENTS_PGFAULT]); seq_printf(m, "pgmajfault %lu\n", - tree_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT)); + events[MEM_CGROUP_EVENTS_PGMAJFAULT]); return 0; } @@ -5431,6 +5416,10 @@ static void uncharge_list(struct list_head *page_list) struct list_head *next; struct page *page; + /* + * Note that the list can be a single page->lru; hence the + * do-while loop instead of a simple list_for_each_entry(). + */ next = page_list->next; do { unsigned int nr_pages = 1; @@ -5517,16 +5506,16 @@ void mem_cgroup_uncharge_list(struct list_head *page_list) } /** - * mem_cgroup_replace_page - migrate a charge to another page - * @oldpage: currently charged page - * @newpage: page to transfer the charge to + * mem_cgroup_migrate - charge a page's replacement + * @oldpage: currently circulating page + * @newpage: replacement page * - * Migrate the charge from @oldpage to @newpage. + * Charge @newpage as a replacement page for @oldpage. @oldpage will + * be uncharged upon free. * * Both pages must be locked, @newpage->mapping must be set up. - * Either or both pages might be on the LRU already. */ -void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) +void mem_cgroup_migrate(struct page *oldpage, struct page *newpage) { struct mem_cgroup *memcg; unsigned int nr_pages; @@ -5559,7 +5548,7 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage) page_counter_charge(&memcg->memsw, nr_pages); css_get_many(&memcg->css, nr_pages); - commit_charge(newpage, memcg, true); + commit_charge(newpage, memcg, false); local_irq_disable(); mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages); |