diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 12 | ||||
-rw-r--r-- | mm/memcontrol.c | 41 |
2 files changed, 51 insertions, 2 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bccd5a628ea6..33a5dc492810 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1481,8 +1481,18 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); VM_BUG_ON(!pmd_none(*new_pmd)); set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); - if (new_ptl != old_ptl) + if (new_ptl != old_ptl) { + pgtable_t pgtable; + + /* + * Move preallocated PTE page table if new_pmd is on + * different PMD page table. + */ + pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); + pgtable_trans_huge_deposit(mm, new_pmd, pgtable); + spin_unlock(new_ptl); + } spin_unlock(old_ptl); } out: diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f1a0ae6e11b8..bf5e89457149 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2694,7 +2694,10 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, goto bypass; if (unlikely(task_in_memcg_oom(current))) - goto bypass; + goto nomem; + + if (gfp_mask & __GFP_NOFAIL) + oom = false; /* * We always charge the cgroup the mm_struct belongs to. @@ -6352,6 +6355,42 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) static void mem_cgroup_css_free(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); + /* + * XXX: css_offline() would be where we should reparent all + * memory to prepare the cgroup for destruction. However, + * memcg does not do css_tryget() and res_counter charging + * under the same RCU lock region, which means that charging + * could race with offlining. Offlining only happens to + * cgroups with no tasks in them but charges can show up + * without any tasks from the swapin path when the target + * memcg is looked up from the swapout record and not from the + * current task as it usually is. A race like this can leak + * charges and put pages with stale cgroup pointers into + * circulation: + * + * #0 #1 + * lookup_swap_cgroup_id() + * rcu_read_lock() + * mem_cgroup_lookup() + * css_tryget() + * rcu_read_unlock() + * disable css_tryget() + * call_rcu() + * offline_css() + * reparent_charges() + * res_counter_charge() + * css_put() + * css_free() + * pc->mem_cgroup = dead memcg + * add page to lru + * + * The bulk of the charges are still moved in offline_css() to + * avoid pinning a lot of pages in case a long-term reference + * like a swapout record is deferring the css_free() to long + * after offlining. But this makes sure we catch any charges + * made after offlining: + */ + mem_cgroup_reparent_charges(memcg); memcg_destroy_kmem(memcg); __mem_cgroup_free(memcg); |