diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 260 |
1 files changed, 96 insertions, 164 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f3a84c64f35c..c265212bec8c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -132,15 +132,11 @@ static const char * const mem_cgroup_lru_names[] = { * their hierarchy representation */ -struct mem_cgroup_tree_per_zone { +struct mem_cgroup_tree_per_node { struct rb_root rb_root; spinlock_t lock; }; -struct mem_cgroup_tree_per_node { - struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; -}; - struct mem_cgroup_tree { struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; }; @@ -323,15 +319,6 @@ EXPORT_SYMBOL(memcg_kmem_enabled_key); #endif /* !CONFIG_SLOB */ -static struct mem_cgroup_per_zone * -mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) -{ - int nid = zone_to_nid(zone); - int zid = zone_idx(zone); - - return &memcg->nodeinfo[nid]->zoneinfo[zid]; -} - /** * mem_cgroup_css_from_page - css of the memcg associated with a page * @page: page of interest @@ -383,37 +370,35 @@ ino_t page_cgroup_ino(struct page *page) return ino; } -static struct mem_cgroup_per_zone * -mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) +static struct mem_cgroup_per_node * +mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page) { int nid = page_to_nid(page); - int zid = page_zonenum(page); - return &memcg->nodeinfo[nid]->zoneinfo[zid]; + return memcg->nodeinfo[nid]; } -static struct mem_cgroup_tree_per_zone * -soft_limit_tree_node_zone(int nid, int zid) +static struct mem_cgroup_tree_per_node * +soft_limit_tree_node(int nid) { - return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; + return soft_limit_tree.rb_tree_per_node[nid]; } -static struct mem_cgroup_tree_per_zone * +static struct mem_cgroup_tree_per_node * soft_limit_tree_from_page(struct page *page) { int nid = page_to_nid(page); - int zid = page_zonenum(page); - return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; + return soft_limit_tree.rb_tree_per_node[nid]; } -static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz, +static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz, + struct mem_cgroup_tree_per_node *mctz, unsigned long new_usage_in_excess) { struct rb_node **p = &mctz->rb_root.rb_node; struct rb_node *parent = NULL; - struct mem_cgroup_per_zone *mz_node; + struct mem_cgroup_per_node *mz_node; if (mz->on_tree) return; @@ -423,7 +408,7 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, return; while (*p) { parent = *p; - mz_node = rb_entry(parent, struct mem_cgroup_per_zone, + mz_node = rb_entry(parent, struct mem_cgroup_per_node, tree_node); if (mz->usage_in_excess < mz_node->usage_in_excess) p = &(*p)->rb_left; @@ -439,8 +424,8 @@ static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz, mz->on_tree = true; } -static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) +static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, + struct mem_cgroup_tree_per_node *mctz) { if (!mz->on_tree) return; @@ -448,8 +433,8 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, mz->on_tree = false; } -static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, - struct mem_cgroup_tree_per_zone *mctz) +static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz, + struct mem_cgroup_tree_per_node *mctz) { unsigned long flags; @@ -473,8 +458,8 @@ static unsigned long soft_limit_excess(struct mem_cgroup *memcg) static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) { unsigned long excess; - struct mem_cgroup_per_zone *mz; - struct mem_cgroup_tree_per_zone *mctz; + struct mem_cgroup_per_node *mz; + struct mem_cgroup_tree_per_node *mctz; mctz = soft_limit_tree_from_page(page); /* @@ -482,7 +467,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) * because their event counter is not touched. */ for (; memcg; memcg = parent_mem_cgroup(memcg)) { - mz = mem_cgroup_page_zoneinfo(memcg, page); + mz = mem_cgroup_page_nodeinfo(memcg, page); excess = soft_limit_excess(memcg); /* * We have to update the tree if mz is on RB-tree or @@ -507,24 +492,22 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) { - struct mem_cgroup_tree_per_zone *mctz; - struct mem_cgroup_per_zone *mz; - int nid, zid; + struct mem_cgroup_tree_per_node *mctz; + struct mem_cgroup_per_node *mz; + int nid; for_each_node(nid) { - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; - mctz = soft_limit_tree_node_zone(nid, zid); - mem_cgroup_remove_exceeded(mz, mctz); - } + mz = mem_cgroup_nodeinfo(memcg, nid); + mctz = soft_limit_tree_node(nid); + mem_cgroup_remove_exceeded(mz, mctz); } } -static struct mem_cgroup_per_zone * -__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +static struct mem_cgroup_per_node * +__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) { struct rb_node *rightmost = NULL; - struct mem_cgroup_per_zone *mz; + struct mem_cgroup_per_node *mz; retry: mz = NULL; @@ -532,7 +515,7 @@ retry: if (!rightmost) goto done; /* Nothing to reclaim from */ - mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); + mz = rb_entry(rightmost, struct mem_cgroup_per_node, tree_node); /* * Remove the node now but someone else can add it back, * we will to add it back at the end of reclaim to its correct @@ -546,10 +529,10 @@ done: return mz; } -static struct mem_cgroup_per_zone * -mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) +static struct mem_cgroup_per_node * +mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz) { - struct mem_cgroup_per_zone *mz; + struct mem_cgroup_per_node *mz; spin_lock_irq(&mctz->lock); mz = __mem_cgroup_largest_soft_limit_node(mctz); @@ -643,20 +626,16 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, int nid, unsigned int lru_mask) { unsigned long nr = 0; - int zid; + struct mem_cgroup_per_node *mz; + enum lru_list lru; VM_BUG_ON((unsigned)nid >= nr_node_ids); - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - struct mem_cgroup_per_zone *mz; - enum lru_list lru; - - for_each_lru(lru) { - if (!(BIT(lru) & lru_mask)) - continue; - mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; - nr += mz->lru_size[lru]; - } + for_each_lru(lru) { + if (!(BIT(lru) & lru_mask)) + continue; + mz = mem_cgroup_nodeinfo(memcg, nid); + nr += mz->lru_size[lru]; } return nr; } @@ -809,9 +788,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, rcu_read_lock(); if (reclaim) { - struct mem_cgroup_per_zone *mz; + struct mem_cgroup_per_node *mz; - mz = mem_cgroup_zone_zoneinfo(root, reclaim->zone); + mz = mem_cgroup_nodeinfo(root, reclaim->pgdat->node_id); iter = &mz->iter[reclaim->priority]; if (prev && reclaim->generation != iter->generation) @@ -910,19 +889,17 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) { struct mem_cgroup *memcg = dead_memcg; struct mem_cgroup_reclaim_iter *iter; - struct mem_cgroup_per_zone *mz; - int nid, zid; + struct mem_cgroup_per_node *mz; + int nid; int i; while ((memcg = parent_mem_cgroup(memcg))) { for_each_node(nid) { - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; - for (i = 0; i <= DEF_PRIORITY; i++) { - iter = &mz->iter[i]; - cmpxchg(&iter->position, - dead_memcg, NULL); - } + mz = mem_cgroup_nodeinfo(memcg, nid); + for (i = 0; i <= DEF_PRIORITY; i++) { + iter = &mz->iter[i]; + cmpxchg(&iter->position, + dead_memcg, NULL); } } } @@ -944,39 +921,6 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) iter = mem_cgroup_iter(NULL, iter, NULL)) /** - * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg - * @zone: zone of the wanted lruvec - * @memcg: memcg of the wanted lruvec - * - * Returns the lru list vector holding pages for the given @zone and - * @mem. This can be the global zone lruvec, if the memory controller - * is disabled. - */ -struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, - struct mem_cgroup *memcg) -{ - struct mem_cgroup_per_zone *mz; - struct lruvec *lruvec; - - if (mem_cgroup_disabled()) { - lruvec = &zone->lruvec; - goto out; - } - - mz = mem_cgroup_zone_zoneinfo(memcg, zone); - lruvec = &mz->lruvec; -out: - /* - * Since a node can be onlined after the mem_cgroup was created, - * we have to be prepared to initialize lruvec->zone here; - * and if offlined then reonlined, we need to reinitialize it. - */ - if (unlikely(lruvec->zone != zone)) - lruvec->zone = zone; - return lruvec; -} - -/** * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page * @page: the page * @zone: zone of the page @@ -985,14 +929,14 @@ out: * and putback protocol: the LRU lock must be held, and the page must * either be PageLRU() or the caller must have isolated/allocated it. */ -struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) +struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat) { - struct mem_cgroup_per_zone *mz; + struct mem_cgroup_per_node *mz; struct mem_cgroup *memcg; struct lruvec *lruvec; if (mem_cgroup_disabled()) { - lruvec = &zone->lruvec; + lruvec = &pgdat->lruvec; goto out; } @@ -1004,7 +948,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) if (!memcg) memcg = root_mem_cgroup; - mz = mem_cgroup_page_zoneinfo(memcg, page); + mz = mem_cgroup_page_nodeinfo(memcg, page); lruvec = &mz->lruvec; out: /* @@ -1012,8 +956,8 @@ out: * we have to be prepared to initialize lruvec->zone here; * and if offlined then reonlined, we need to reinitialize it. */ - if (unlikely(lruvec->zone != zone)) - lruvec->zone = zone; + if (unlikely(lruvec->pgdat != pgdat)) + lruvec->pgdat = pgdat; return lruvec; } @@ -1030,17 +974,15 @@ out: void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, int nr_pages) { - struct mem_cgroup_per_zone *mz; + struct mem_cgroup_per_node *mz; unsigned long *lru_size; long size; bool empty; - __update_lru_size(lruvec, lru, nr_pages); - if (mem_cgroup_disabled()) return; - mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); + mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); lru_size = mz->lru_size + lru; empty = list_empty(lruvec->lists + lru); @@ -1276,9 +1218,9 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, * select it. The goal is to allow it to allocate so that it may * quickly exit and free its memory. */ - if (fatal_signal_pending(current) || task_will_free_mem(current)) { + if (task_will_free_mem(current)) { mark_oom_victim(current); - try_oom_reaper(current); + wake_oom_reaper(current); goto unlock; } @@ -1433,7 +1375,7 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) #endif static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, - struct zone *zone, + pg_data_t *pgdat, gfp_t gfp_mask, unsigned long *total_scanned) { @@ -1443,7 +1385,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, unsigned long excess; unsigned long nr_scanned; struct mem_cgroup_reclaim_cookie reclaim = { - .zone = zone, + .pgdat = pgdat, .priority = 0, }; @@ -1473,8 +1415,8 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, } continue; } - total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, - zone, &nr_scanned); + total += mem_cgroup_shrink_node(victim, gfp_mask, false, + pgdat, &nr_scanned); *total_scanned += nr_scanned; if (!soft_limit_excess(root_memcg)) break; @@ -2107,11 +2049,11 @@ static void lock_page_lru(struct page *page, int *isolated) { struct zone *zone = page_zone(page); - spin_lock_irq(&zone->lru_lock); + spin_lock_irq(zone_lru_lock(zone)); if (PageLRU(page)) { struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_lru(page)); *isolated = 1; @@ -2126,12 +2068,12 @@ static void unlock_page_lru(struct page *page, int isolated) if (isolated) { struct lruvec *lruvec; - lruvec = mem_cgroup_page_lruvec(page, zone); + lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat); VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, page_lru(page)); } - spin_unlock_irq(&zone->lru_lock); + spin_unlock_irq(zone_lru_lock(zone)); } static void commit_charge(struct page *page, struct mem_cgroup *memcg, @@ -2431,7 +2373,7 @@ void memcg_kmem_uncharge(struct page *page, int order) /* * Because tail pages are not marked as "used", set it. We're under - * zone->lru_lock and migration entries setup in all page mappings. + * zone_lru_lock and migration entries setup in all page mappings. */ void mem_cgroup_split_huge_fixup(struct page *head) { @@ -2601,22 +2543,22 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, return ret; } -unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, +unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, gfp_t gfp_mask, unsigned long *total_scanned) { unsigned long nr_reclaimed = 0; - struct mem_cgroup_per_zone *mz, *next_mz = NULL; + struct mem_cgroup_per_node *mz, *next_mz = NULL; unsigned long reclaimed; int loop = 0; - struct mem_cgroup_tree_per_zone *mctz; + struct mem_cgroup_tree_per_node *mctz; unsigned long excess; unsigned long nr_scanned; if (order > 0) return 0; - mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); + mctz = soft_limit_tree_node(pgdat->node_id); /* * This loop can run a while, specially if mem_cgroup's continuously * keep exceeding their soft limit and putting the system under @@ -2631,7 +2573,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, break; nr_scanned = 0; - reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, + reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat, gfp_mask, &nr_scanned); nr_reclaimed += reclaimed; *total_scanned += nr_scanned; @@ -3252,22 +3194,21 @@ static int memcg_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_DEBUG_VM { - int nid, zid; - struct mem_cgroup_per_zone *mz; + pg_data_t *pgdat; + struct mem_cgroup_per_node *mz; struct zone_reclaim_stat *rstat; unsigned long recent_rotated[2] = {0, 0}; unsigned long recent_scanned[2] = {0, 0}; - for_each_online_node(nid) - for (zid = 0; zid < MAX_NR_ZONES; zid++) { - mz = &memcg->nodeinfo[nid]->zoneinfo[zid]; - rstat = &mz->lruvec.reclaim_stat; + for_each_online_pgdat(pgdat) { + mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); + rstat = &mz->lruvec.reclaim_stat; - recent_rotated[0] += rstat->recent_rotated[0]; - recent_rotated[1] += rstat->recent_rotated[1]; - recent_scanned[0] += rstat->recent_scanned[0]; - recent_scanned[1] += rstat->recent_scanned[1]; - } + recent_rotated[0] += rstat->recent_rotated[0]; + recent_rotated[1] += rstat->recent_rotated[1]; + recent_scanned[0] += rstat->recent_scanned[0]; + recent_scanned[1] += rstat->recent_scanned[1]; + } seq_printf(m, "recent_rotated_anon %lu\n", recent_rotated[0]); seq_printf(m, "recent_rotated_file %lu\n", recent_rotated[1]); seq_printf(m, "recent_scanned_anon %lu\n", recent_scanned[0]); @@ -4147,11 +4088,10 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id) return idr_find(&mem_cgroup_idr, id); } -static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) +static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) { struct mem_cgroup_per_node *pn; - struct mem_cgroup_per_zone *mz; - int zone, tmp = node; + int tmp = node; /* * This routine is called against possible nodes. * But it's BUG to call kmalloc() against offline node. @@ -4166,18 +4106,16 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) if (!pn) return 1; - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - mz = &pn->zoneinfo[zone]; - lruvec_init(&mz->lruvec); - mz->usage_in_excess = 0; - mz->on_tree = false; - mz->memcg = memcg; - } + lruvec_init(&pn->lruvec); + pn->usage_in_excess = 0; + pn->on_tree = false; + pn->memcg = memcg; + memcg->nodeinfo[node] = pn; return 0; } -static void free_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) +static void free_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) { kfree(memcg->nodeinfo[node]); } @@ -4188,7 +4126,7 @@ static void mem_cgroup_free(struct mem_cgroup *memcg) memcg_wb_domain_exit(memcg); for_each_node(node) - free_mem_cgroup_per_zone_info(memcg, node); + free_mem_cgroup_per_node_info(memcg, node); free_percpu(memcg->stat); kfree(memcg); } @@ -4217,7 +4155,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) goto fail; for_each_node(node) - if (alloc_mem_cgroup_per_zone_info(memcg, node)) + if (alloc_mem_cgroup_per_node_info(memcg, node)) goto fail; if (memcg_wb_domain_init(memcg, GFP_KERNEL)) @@ -5233,7 +5171,7 @@ static int memory_stat_show(struct seq_file *m, void *v) seq_printf(m, "file %llu\n", (u64)stat[MEM_CGROUP_STAT_CACHE] * PAGE_SIZE); seq_printf(m, "kernel_stack %llu\n", - (u64)stat[MEMCG_KERNEL_STACK] * PAGE_SIZE); + (u64)stat[MEMCG_KERNEL_STACK_KB] * 1024); seq_printf(m, "slab %llu\n", (u64)(stat[MEMCG_SLAB_RECLAIMABLE] + stat[MEMCG_SLAB_UNRECLAIMABLE]) * PAGE_SIZE); @@ -5820,18 +5758,12 @@ static int __init mem_cgroup_init(void) for_each_node(node) { struct mem_cgroup_tree_per_node *rtpn; - int zone; rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, node_online(node) ? node : NUMA_NO_NODE); - for (zone = 0; zone < MAX_NR_ZONES; zone++) { - struct mem_cgroup_tree_per_zone *rtpz; - - rtpz = &rtpn->rb_tree_per_zone[zone]; - rtpz->rb_root = RB_ROOT; - spin_lock_init(&rtpz->lock); - } + rtpn->rb_root = RB_ROOT; + spin_lock_init(&rtpn->lock); soft_limit_tree.rb_tree_per_node[node] = rtpn; } |