summaryrefslogtreecommitdiff
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c453
1 files changed, 169 insertions, 284 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1f14a430c656..90dc501eaf3f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,55 +2551,72 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
-
-/* See mem_cgroup_try_charge() for details */
-enum {
- CHARGE_OK, /* success */
- CHARGE_RETRY, /* need to retry but retry is not bad */
- CHARGE_NOMEM, /* we can't do more. return -ENOMEM */
- CHARGE_WOULDBLOCK, /* GFP_WAIT wasn't set and no enough res. */
-};
-
-static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
- unsigned int nr_pages, unsigned int min_pages,
- bool invoke_oom)
+/**
+ * mem_cgroup_try_charge - try charging a memcg
+ * @memcg: memcg to charge
+ * @nr_pages: number of pages to charge
+ *
+ * Returns 0 if @memcg was charged successfully, -EINTR if the charge
+ * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
+ */
+static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
+ gfp_t gfp_mask,
+ unsigned int nr_pages)
{
- unsigned long csize = nr_pages * PAGE_SIZE;
+ unsigned int batch = max(CHARGE_BATCH, nr_pages);
+ int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
struct mem_cgroup *mem_over_limit;
struct res_counter *fail_res;
+ unsigned long nr_reclaimed;
unsigned long flags = 0;
- int ret;
+ unsigned long long size;
+ int ret = 0;
- ret = res_counter_charge(&memcg->res, csize, &fail_res);
+retry:
+ if (consume_stock(memcg, nr_pages))
+ goto done;
- if (likely(!ret)) {
+ size = batch * PAGE_SIZE;
+ if (!res_counter_charge(&memcg->res, size, &fail_res)) {
if (!do_swap_account)
- return CHARGE_OK;
- ret = res_counter_charge(&memcg->memsw, csize, &fail_res);
- if (likely(!ret))
- return CHARGE_OK;
-
- res_counter_uncharge(&memcg->res, csize);
+ goto done_restock;
+ if (!res_counter_charge(&memcg->memsw, size, &fail_res))
+ goto done_restock;
+ res_counter_uncharge(&memcg->res, size);
mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
flags |= MEM_CGROUP_RECLAIM_NOSWAP;
} else
mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+
+ if (batch > nr_pages) {
+ batch = nr_pages;
+ goto retry;
+ }
+
/*
- * Never reclaim on behalf of optional batching, retry with a
- * single page instead.
+ * Unlike in global OOM situations, memcg is not in a physical
+ * memory shortage. Allow dying and OOM-killed tasks to
+ * bypass the last charges so that they can exit quickly and
+ * free their memory.
*/
- if (nr_pages > min_pages)
- return CHARGE_RETRY;
+ if (unlikely(test_thread_flag(TIF_MEMDIE) ||
+ fatal_signal_pending(current) ||
+ current->flags & PF_EXITING))
+ goto bypass;
+
+ if (unlikely(task_in_memcg_oom(current)))
+ goto nomem;
if (!(gfp_mask & __GFP_WAIT))
- return CHARGE_WOULDBLOCK;
+ goto nomem;
- if (gfp_mask & __GFP_NORETRY)
- return CHARGE_NOMEM;
+ nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
- ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
- return CHARGE_RETRY;
+ goto retry;
+
+ if (gfp_mask & __GFP_NORETRY)
+ goto nomem;
/*
* Even though the limit is exceeded at this point, reclaim
* may have been able to free some pages. Retry the charge
@@ -2609,96 +2626,38 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
* unlikely to succeed so close to the limit, and we fall back
* to regular pages anyway in case of failure.
*/
- if (nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER) && ret)
- return CHARGE_RETRY;
-
+ if (nr_reclaimed && nr_pages <= (1 << PAGE_ALLOC_COSTLY_ORDER))
+ goto retry;
/*
* At task move, charge accounts can be doubly counted. So, it's
* better to wait until the end of task_move if something is going on.
*/
if (mem_cgroup_wait_acct_move(mem_over_limit))
- return CHARGE_RETRY;
-
- if (invoke_oom)
- mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
-
- return CHARGE_NOMEM;
-}
-
-/**
- * mem_cgroup_try_charge - try charging a memcg
- * @memcg: memcg to charge
- * @nr_pages: number of pages to charge
- * @oom: trigger OOM if reclaim fails
- *
- * Returns 0 if @memcg was charged successfully, -EINTR if the charge
- * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
- */
-static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
- gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
-{
- unsigned int batch = max(CHARGE_BATCH, nr_pages);
- int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
- int ret;
-
- if (mem_cgroup_is_root(memcg))
- goto done;
- /*
- * Unlike in global OOM situations, memcg is not in a physical
- * memory shortage. Allow dying and OOM-killed tasks to
- * bypass the last charges so that they can exit quickly and
- * free their memory.
- */
- if (unlikely(test_thread_flag(TIF_MEMDIE) ||
- fatal_signal_pending(current) ||
- current->flags & PF_EXITING))
- goto bypass;
+ goto retry;
- if (unlikely(task_in_memcg_oom(current)))
- goto nomem;
+ if (nr_retries--)
+ goto retry;
if (gfp_mask & __GFP_NOFAIL)
- oom = false;
-again:
- if (consume_stock(memcg, nr_pages))
- goto done;
-
- do {
- bool invoke_oom = oom && !nr_oom_retries;
-
- /* If killed, bypass charge */
- if (fatal_signal_pending(current))
- goto bypass;
+ goto bypass;
- ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
- nr_pages, invoke_oom);
- switch (ret) {
- case CHARGE_OK:
- break;
- case CHARGE_RETRY: /* not in OOM situation but retry */
- batch = nr_pages;
- goto again;
- case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
- goto nomem;
- case CHARGE_NOMEM: /* OOM routine works */
- if (!oom || invoke_oom)
- goto nomem;
- nr_oom_retries--;
- break;
- }
- } while (ret != CHARGE_OK);
+ if (fatal_signal_pending(current))
+ goto bypass;
- if (batch > nr_pages)
- refill_stock(memcg, batch - nr_pages);
-done:
- return 0;
+ mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
nomem:
if (!(gfp_mask & __GFP_NOFAIL))
return -ENOMEM;
bypass:
- return -EINTR;
+ memcg = root_mem_cgroup;
+ ret = -EINTR;
+ goto retry;
+
+done_restock:
+ if (batch > nr_pages)
+ refill_stock(memcg, batch - nr_pages);
+done:
+ return ret;
}
/**
@@ -2712,15 +2671,14 @@ bypass:
*/
static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
gfp_t gfp_mask,
- unsigned int nr_pages,
- bool oom)
+ unsigned int nr_pages)
{
struct mem_cgroup *memcg;
int ret;
memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
+ ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
css_put(&memcg->css);
if (ret == -EINTR)
memcg = root_mem_cgroup;
@@ -2738,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
unsigned int nr_pages)
{
- if (!mem_cgroup_is_root(memcg)) {
- unsigned long bytes = nr_pages * PAGE_SIZE;
+ unsigned long bytes = nr_pages * PAGE_SIZE;
- res_counter_uncharge(&memcg->res, bytes);
- if (do_swap_account)
- res_counter_uncharge(&memcg->memsw, bytes);
- }
+ res_counter_uncharge(&memcg->res, bytes);
+ if (do_swap_account)
+ res_counter_uncharge(&memcg->memsw, bytes);
}
/*
@@ -2756,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
{
unsigned long bytes = nr_pages * PAGE_SIZE;
- if (mem_cgroup_is_root(memcg))
- return;
-
res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
if (do_swap_account)
res_counter_uncharge_until(&memcg->memsw,
@@ -2842,14 +2795,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
}
pc->mem_cgroup = memcg;
- /*
- * We access a page_cgroup asynchronously without lock_page_cgroup().
- * Especially when a page_cgroup is taken from a page, pc->mem_cgroup
- * is accessed after testing USED bit. To make pc->mem_cgroup visible
- * before USED bit, we need memory barrier here.
- * See mem_cgroup_add_lru_list(), etc.
- */
- smp_wmb();
SetPageCgroupUsed(pc);
if (lrucare) {
@@ -2937,8 +2882,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
if (ret)
return ret;
- ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
- oom_gfp_allowed(gfp));
+ ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT);
if (ret == -EINTR) {
/*
* mem_cgroup_try_charge() chosed to bypass to root due to
@@ -3463,12 +3407,13 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
return;
}
-
+ /*
+ * The page is freshly allocated and not visible to any
+ * outside callers yet. Set up pc non-atomically.
+ */
pc = lookup_page_cgroup(page);
- lock_page_cgroup(pc);
pc->mem_cgroup = memcg;
- SetPageCgroupUsed(pc);
- unlock_page_cgroup(pc);
+ pc->flags = PCG_USED;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -3478,19 +3423,11 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
pc = lookup_page_cgroup(page);
- /*
- * Fast unlocked return. Theoretically might have changed, have to
- * check again after locking.
- */
if (!PageCgroupUsed(pc))
return;
- lock_page_cgroup(pc);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- ClearPageCgroupUsed(pc);
- }
- unlock_page_cgroup(pc);
+ memcg = pc->mem_cgroup;
+ pc->flags = 0;
/*
* We trust that only if there is a memcg associated with the page, it
@@ -3531,7 +3468,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
for (i = 1; i < HPAGE_PMD_NR; i++) {
pc = head_pc + i;
pc->mem_cgroup = memcg;
- smp_wmb();/* see __commit_charge() */
pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
}
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
@@ -3687,7 +3623,6 @@ int mem_cgroup_charge_anon(struct page *page,
{
unsigned int nr_pages = 1;
struct mem_cgroup *memcg;
- bool oom = true;
if (mem_cgroup_disabled())
return 0;
@@ -3699,14 +3634,9 @@ int mem_cgroup_charge_anon(struct page *page,
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
- /*
- * Never OOM-kill a process for a huge page. The
- * fault handler will fall back to regular pages.
- */
- oom = false;
}
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
if (!memcg)
return -ENOMEM;
__mem_cgroup_commit_charge(memcg, page, nr_pages,
@@ -3743,7 +3673,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
memcg = try_get_mem_cgroup_from_page(page);
if (!memcg)
memcg = get_mem_cgroup_from_mm(mm);
- ret = mem_cgroup_try_charge(memcg, mask, 1, true);
+ ret = mem_cgroup_try_charge(memcg, mask, 1);
css_put(&memcg->css);
if (ret == -EINTR)
memcg = root_mem_cgroup;
@@ -3770,7 +3700,7 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
if (!PageSwapCache(page)) {
struct mem_cgroup *memcg;
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
if (!memcg)
return -ENOMEM;
*memcgp = memcg;
@@ -3839,7 +3769,7 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
return 0;
}
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
if (!memcg)
return -ENOMEM;
__mem_cgroup_commit_charge(memcg, page, 1, type, false);
@@ -3993,7 +3923,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
* replacement page, so leave it alone when phasing out the
* page that is unused after the migration.
*/
- if (!end_migration && !mem_cgroup_is_root(memcg))
+ if (!end_migration)
mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
return memcg;
@@ -4126,8 +4056,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
* We uncharge this because swap is freed. This memcg can
* be obsolete one. We avoid calling css_tryget_online().
*/
- if (!mem_cgroup_is_root(memcg))
- res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+ res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_swap_statistics(memcg, false);
css_put(&memcg->css);
}
@@ -4817,78 +4746,24 @@ out:
return retval;
}
-
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
- enum mem_cgroup_stat_index idx)
-{
- struct mem_cgroup *iter;
- long val = 0;
-
- /* Per-cpu values can be negative, use a signed accumulator */
- for_each_mem_cgroup_tree(iter, memcg)
- val += mem_cgroup_read_stat(iter, idx);
-
- if (val < 0) /* race ? */
- val = 0;
- return val;
-}
-
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
-{
- u64 val;
-
- if (!mem_cgroup_is_root(memcg)) {
- if (!swap)
- return res_counter_read_u64(&memcg->res, RES_USAGE);
- else
- return res_counter_read_u64(&memcg->memsw, RES_USAGE);
- }
-
- /*
- * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
- * as well as in MEM_CGROUP_STAT_RSS_HUGE.
- */
- val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
- if (swap)
- val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
- return val << PAGE_SHIFT;
-}
-
static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
- struct cftype *cft)
+ struct cftype *cft)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
- u64 val;
- int name;
- enum res_type type;
-
- type = MEMFILE_TYPE(cft->private);
- name = MEMFILE_ATTR(cft->private);
+ enum res_type type = MEMFILE_TYPE(cft->private);
+ int name = MEMFILE_ATTR(cft->private);
switch (type) {
case _MEM:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, false);
- else
- val = res_counter_read_u64(&memcg->res, name);
- break;
+ return res_counter_read_u64(&memcg->res, name);
case _MEMSWAP:
- if (name == RES_USAGE)
- val = mem_cgroup_usage(memcg, true);
- else
- val = res_counter_read_u64(&memcg->memsw, name);
- break;
+ return res_counter_read_u64(&memcg->memsw, name);
case _KMEM:
- val = res_counter_read_u64(&memcg->kmem, name);
+ return res_counter_read_u64(&memcg->kmem, name);
break;
default:
BUG();
}
-
- return val;
}
#ifdef CONFIG_MEMCG_KMEM
@@ -5350,7 +5225,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
if (!t)
goto unlock;
- usage = mem_cgroup_usage(memcg, swap);
+ if (!swap)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
/*
* current_threshold points to threshold just below or equal to usage.
@@ -5446,15 +5324,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before adding a new one */
if (thresholds->primary)
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -5534,18 +5412,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
int i, j, size;
mutex_lock(&memcg->thresholds_lock);
- if (type == _MEM)
+
+ if (type == _MEM) {
thresholds = &memcg->thresholds;
- else if (type == _MEMSWAP)
+ usage = res_counter_read_u64(&memcg->res, RES_USAGE);
+ } else if (type == _MEMSWAP) {
thresholds = &memcg->memsw_thresholds;
- else
+ usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+ } else
BUG();
if (!thresholds->primary)
goto unlock;
- usage = mem_cgroup_usage(memcg, type == _MEMSWAP);
-
/* Check if a threshold crossed before removing */
__mem_cgroup_threshold(memcg, type == _MEMSWAP);
@@ -6007,7 +5886,6 @@ static struct cftype mem_cgroup_files[] = {
},
{
.name = "use_hierarchy",
- .flags = CFTYPE_INSANE,
.write_u64 = mem_cgroup_hierarchy_write,
.read_u64 = mem_cgroup_hierarchy_read,
},
@@ -6300,9 +6178,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
* core guarantees its existence.
*/
} else {
- res_counter_init(&memcg->res, NULL);
- res_counter_init(&memcg->memsw, NULL);
- res_counter_init(&memcg->kmem, NULL);
+ res_counter_init(&memcg->res, &root_mem_cgroup->res);
+ res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw);
+ res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
/*
* Deeper hierachy with use_hierarchy == false doesn't make
* much sense so let cgroup subsystem know about this
@@ -6411,57 +6289,64 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
__mem_cgroup_free(memcg);
}
+/**
+ * mem_cgroup_css_reset - reset the states of a mem_cgroup
+ * @css: the target css
+ *
+ * Reset the states of the mem_cgroup associated with @css. This is
+ * invoked when the userland requests disabling on the default hierarchy
+ * but the memcg is pinned through dependency. The memcg should stop
+ * applying policies and should revert to the vanilla state as it may be
+ * made visible again.
+ *
+ * The current implementation only resets the essential configurations.
+ * This needs to be expanded to cover all the visible parts.
+ */
+static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+ mem_cgroup_resize_limit(memcg, ULLONG_MAX);
+ mem_cgroup_resize_memsw_limit(memcg, ULLONG_MAX);
+ memcg_update_kmem_limit(memcg, ULLONG_MAX);
+ res_counter_set_soft_limit(&memcg->res, ULLONG_MAX);
+}
+
#ifdef CONFIG_MMU
/* Handlers for move charge at task migration. */
-#define PRECHARGE_COUNT_AT_ONCE 256
static int mem_cgroup_do_precharge(unsigned long count)
{
- int ret = 0;
- int batch_count = PRECHARGE_COUNT_AT_ONCE;
- struct mem_cgroup *memcg = mc.to;
+ int ret;
- if (mem_cgroup_is_root(memcg)) {
+ /* Try a single bulk charge without reclaim first */
+ ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+ if (!ret) {
mc.precharge += count;
- /* we don't need css_get for root */
return ret;
}
- /* try to charge at once */
- if (count > 1) {
- struct res_counter *dummy;
- /*
- * "memcg" cannot be under rmdir() because we've already checked
- * by cgroup_lock_live_cgroup() that it is not removed and we
- * are still under the same cgroup_mutex. So we can postpone
- * css_get().
- */
- if (res_counter_charge(&memcg->res, PAGE_SIZE * count, &dummy))
- goto one_by_one;
- if (do_swap_account && res_counter_charge(&memcg->memsw,
- PAGE_SIZE * count, &dummy)) {
- res_counter_uncharge(&memcg->res, PAGE_SIZE * count);
- goto one_by_one;
- }
- mc.precharge += count;
+ if (ret == -EINTR) {
+ __mem_cgroup_cancel_charge(root_mem_cgroup, count);
return ret;
}
-one_by_one:
- /* fall back to one by one charge */
+
+ /* Try charges one by one with reclaim */
while (count--) {
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- if (!batch_count--) {
- batch_count = PRECHARGE_COUNT_AT_ONCE;
- cond_resched();
- }
- ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
+ ret = mem_cgroup_try_charge(mc.to,
+ GFP_KERNEL & ~__GFP_NORETRY, 1);
+ /*
+ * In case of failure, any residual charges against
+ * mc.to will be dropped by mem_cgroup_clear_mc()
+ * later on. However, cancel any charges that are
+ * bypassed to root right away or they'll be lost.
+ */
+ if (ret == -EINTR)
+ __mem_cgroup_cancel_charge(root_mem_cgroup, 1);
if (ret)
- /* mem_cgroup_clear_mc() will do uncharge later */
return ret;
mc.precharge++;
+ cond_resched();
}
- return ret;
+ return 0;
}
/**
@@ -6738,21 +6623,18 @@ static void __mem_cgroup_clear_mc(void)
/* we must fixup refcnts and charges */
if (mc.moved_swap) {
/* uncharge swap account from the old cgroup */
- if (!mem_cgroup_is_root(mc.from))
- res_counter_uncharge(&mc.from->memsw,
- PAGE_SIZE * mc.moved_swap);
+ res_counter_uncharge(&mc.from->memsw,
+ PAGE_SIZE * mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++)
css_put(&mc.from->css);
- if (!mem_cgroup_is_root(mc.to)) {
- /*
- * we charged both to->res and to->memsw, so we should
- * uncharge to->res.
- */
- res_counter_uncharge(&mc.to->res,
- PAGE_SIZE * mc.moved_swap);
- }
+ /*
+ * we charged both to->res and to->memsw, so we should
+ * uncharge to->res.
+ */
+ res_counter_uncharge(&mc.to->res,
+ PAGE_SIZE * mc.moved_swap);
/* we've already done css_get(mc.to) */
mc.moved_swap = 0;
}
@@ -7005,16 +6887,17 @@ static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
/*
* Cgroup retains root cgroups across [un]mount cycles making it necessary
- * to verify sane_behavior flag on each mount attempt.
+ * to verify whether we're attached to the default hierarchy on each mount
+ * attempt.
*/
static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
{
/*
- * use_hierarchy is forced with sane_behavior. cgroup core
+ * use_hierarchy is forced on the default hierarchy. cgroup core
* guarantees that @root doesn't have any children, so turning it
* on for the root memcg is enough.
*/
- if (cgroup_sane_behavior(root_css->cgroup))
+ if (cgroup_on_dfl(root_css->cgroup))
mem_cgroup_from_css(root_css)->use_hierarchy = true;
}
@@ -7023,11 +6906,12 @@ struct cgroup_subsys memory_cgrp_subsys = {
.css_online = mem_cgroup_css_online,
.css_offline = mem_cgroup_css_offline,
.css_free = mem_cgroup_css_free,
+ .css_reset = mem_cgroup_css_reset,
.can_attach = mem_cgroup_can_attach,
.cancel_attach = mem_cgroup_cancel_attach,
.attach = mem_cgroup_move_task,
.bind = mem_cgroup_bind,
- .base_cftypes = mem_cgroup_files,
+ .legacy_cftypes = mem_cgroup_files,
.early_init = 0,
};
@@ -7044,7 +6928,8 @@ __setup("swapaccount=", enable_swap_account);
static void __init memsw_file_init(void)
{
- WARN_ON(cgroup_add_cftypes(&memory_cgrp_subsys, memsw_cgroup_files));
+ WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
+ memsw_cgroup_files));
}
static void __init enable_swap_cgroup(void)