summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c12
-rw-r--r--mm/memcontrol.c41
2 files changed, 51 insertions, 2 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bccd5a628ea6..33a5dc492810 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1481,8 +1481,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
VM_BUG_ON(!pmd_none(*new_pmd));
set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd));
- if (new_ptl != old_ptl)
+ if (new_ptl != old_ptl) {
+ pgtable_t pgtable;
+
+ /*
+ * Move preallocated PTE page table if new_pmd is on
+ * different PMD page table.
+ */
+ pgtable = pgtable_trans_huge_withdraw(mm, old_pmd);
+ pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
+
spin_unlock(new_ptl);
+ }
spin_unlock(old_ptl);
}
out:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f1a0ae6e11b8..bf5e89457149 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2694,7 +2694,10 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
goto bypass;
if (unlikely(task_in_memcg_oom(current)))
- goto bypass;
+ goto nomem;
+
+ if (gfp_mask & __GFP_NOFAIL)
+ oom = false;
/*
* We always charge the cgroup the mm_struct belongs to.
@@ -6352,6 +6355,42 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ /*
+ * XXX: css_offline() would be where we should reparent all
+ * memory to prepare the cgroup for destruction. However,
+ * memcg does not do css_tryget() and res_counter charging
+ * under the same RCU lock region, which means that charging
+ * could race with offlining. Offlining only happens to
+ * cgroups with no tasks in them but charges can show up
+ * without any tasks from the swapin path when the target
+ * memcg is looked up from the swapout record and not from the
+ * current task as it usually is. A race like this can leak
+ * charges and put pages with stale cgroup pointers into
+ * circulation:
+ *
+ * #0 #1
+ * lookup_swap_cgroup_id()
+ * rcu_read_lock()
+ * mem_cgroup_lookup()
+ * css_tryget()
+ * rcu_read_unlock()
+ * disable css_tryget()
+ * call_rcu()
+ * offline_css()
+ * reparent_charges()
+ * res_counter_charge()
+ * css_put()
+ * css_free()
+ * pc->mem_cgroup = dead memcg
+ * add page to lru
+ *
+ * The bulk of the charges are still moved in offline_css() to
+ * avoid pinning a lot of pages in case a long-term reference
+ * like a swapout record is deferring the css_free() to long
+ * after offlining. But this makes sure we catch any charges
+ * made after offlining:
+ */
+ mem_cgroup_reparent_charges(memcg);
memcg_destroy_kmem(memcg);
__mem_cgroup_free(memcg);