diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-20 06:00:06 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-20 06:00:06 +0300 |
commit | a05a70db34ba24ca009e1c9cedaef26fd17d5470 (patch) | |
tree | d5d8d0c80293bed52f2103ccc56a9e09117dc983 /mm/oom_kill.c | |
parent | 03b979dd0323ace8e29a0561cd5232f73a060c09 (diff) | |
parent | 4741526b83c5d3a3d661d1896f9e7414c5730bcb (diff) | |
download | linux-a05a70db34ba24ca009e1c9cedaef26fd17d5470.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- fsnotify fix
- poll() timeout fix
- a few scripts/ tweaks
- debugobjects updates
- the (small) ocfs2 queue
- Minor fixes to kernel/padata.c
- Maybe half of the MM queue
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits)
mm, page_alloc: restore the original nodemask if the fast path allocation failed
mm, page_alloc: uninline the bad page part of check_new_page()
mm, page_alloc: don't duplicate code in free_pcp_prepare
mm, page_alloc: defer debugging checks of pages allocated from the PCP
mm, page_alloc: defer debugging checks of freed pages until a PCP drain
cpuset: use static key better and convert to new API
mm, page_alloc: inline pageblock lookup in page free fast paths
mm, page_alloc: remove unnecessary variable from free_pcppages_bulk
mm, page_alloc: pull out side effects from free_pages_check
mm, page_alloc: un-inline the bad part of free_pages_check
mm, page_alloc: check multiple page fields with a single branch
mm, page_alloc: remove field from alloc_context
mm, page_alloc: avoid looking up the first zone in a zonelist twice
mm, page_alloc: shortcut watermark checks for order-0 pages
mm, page_alloc: reduce cost of fair zone allocation policy retry
mm, page_alloc: shorten the page allocator fast path
mm, page_alloc: check once if a zone has isolated pageblocks
mm, page_alloc: move __GFP_HARDWALL modifications out of the fastpath
mm, page_alloc: simplify last cpupid reset
mm, page_alloc: remove unnecessary initialisation from __alloc_pages_nodemask()
...
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r-- | mm/oom_kill.c | 112 |
1 files changed, 88 insertions, 24 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 86349586eacb..415f7eb913fa 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -412,6 +412,25 @@ bool oom_killer_disabled __read_mostly; #define K(x) ((x) << (PAGE_SHIFT-10)) +/* + * task->mm can be NULL if the task is the exited group leader. So to + * determine whether the task is using a particular mm, we examine all the + * task's threads: if one of those is using this mm then this task was also + * using it. + */ +static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) +{ + struct task_struct *t; + + for_each_thread(p, t) { + struct mm_struct *t_mm = READ_ONCE(t->mm); + if (t_mm) + return t_mm == mm; + } + return false; +} + + #ifdef CONFIG_MMU /* * OOM Reaper kernel thread which tries to reap the memory used by the OOM @@ -491,14 +510,10 @@ static bool __oom_reap_task(struct task_struct *tsk) up_read(&mm->mmap_sem); /* - * Clear TIF_MEMDIE because the task shouldn't be sitting on a - * reasonably reclaimable memory anymore. OOM killer can continue - * by selecting other victim if unmapping hasn't led to any - * improvements. This also means that selecting this task doesn't - * make any sense. + * This task can be safely ignored because we cannot do much more + * to release its memory. */ tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN; - exit_oom_victim(tsk); out: mmput(mm); return ret; @@ -519,6 +534,15 @@ static void oom_reap_task(struct task_struct *tsk) debug_show_all_locks(); } + /* + * Clear TIF_MEMDIE because the task shouldn't be sitting on a + * reasonably reclaimable memory anymore or it is not a good candidate + * for the oom victim right now because it cannot release its memory + * itself nor by the oom reaper. + */ + tsk->oom_reaper_list = NULL; + exit_oom_victim(tsk); + /* Drop a reference taken by wake_oom_reaper */ put_task_struct(tsk); } @@ -563,6 +587,53 @@ static void wake_oom_reaper(struct task_struct *tsk) wake_up(&oom_reaper_wait); } +/* Check if we can reap the given task. This has to be called with stable + * tsk->mm + */ +void try_oom_reaper(struct task_struct *tsk) +{ + struct mm_struct *mm = tsk->mm; + struct task_struct *p; + + if (!mm) + return; + + /* + * There might be other threads/processes which are either not + * dying or even not killable. + */ + if (atomic_read(&mm->mm_users) > 1) { + rcu_read_lock(); + for_each_process(p) { + bool exiting; + + if (!process_shares_mm(p, mm)) + continue; + if (same_thread_group(p, tsk)) + continue; + if (fatal_signal_pending(p)) + continue; + + /* + * If the task is exiting make sure the whole thread group + * is exiting and cannot acces mm anymore. + */ + spin_lock_irq(&p->sighand->siglock); + exiting = signal_group_exit(p->signal); + spin_unlock_irq(&p->sighand->siglock); + if (exiting) + continue; + + /* Give up */ + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + } + + wake_oom_reaper(tsk); +} + static int __init oom_init(void) { oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); @@ -653,24 +724,6 @@ void oom_killer_enable(void) } /* - * task->mm can be NULL if the task is the exited group leader. So to - * determine whether the task is using a particular mm, we examine all the - * task's threads: if one of those is using this mm then this task was also - * using it. - */ -static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) -{ - struct task_struct *t; - - for_each_thread(p, t) { - struct mm_struct *t_mm = READ_ONCE(t->mm); - if (t_mm) - return t_mm == mm; - } - return false; -} - -/* * Must be called while holding a reference to p, which will be released upon * returning. */ @@ -694,6 +747,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, task_lock(p); if (p->mm && task_will_free_mem(p)) { mark_oom_victim(p); + try_oom_reaper(p); task_unlock(p); put_task_struct(p); return; @@ -873,10 +927,20 @@ bool out_of_memory(struct oom_control *oc) if (current->mm && (fatal_signal_pending(current) || task_will_free_mem(current))) { mark_oom_victim(current); + try_oom_reaper(current); return true; } /* + * The OOM killer does not compensate for IO-less reclaim. + * pagefault_out_of_memory lost its gfp context so we have to + * make sure exclude 0 mask - all other users should have at least + * ___GFP_DIRECT_RECLAIM to get here. + */ + if (oc->gfp_mask && !(oc->gfp_mask & (__GFP_FS|__GFP_NOFAIL))) + return true; + + /* * Check if there were limitations on the allocation (only relevant for * NUMA) that may require different handling. */ |