summaryrefslogtreecommitdiff
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 06:00:06 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 06:00:06 +0300
commita05a70db34ba24ca009e1c9cedaef26fd17d5470 (patch)
treed5d8d0c80293bed52f2103ccc56a9e09117dc983 /mm/oom_kill.c
parent03b979dd0323ace8e29a0561cd5232f73a060c09 (diff)
parent4741526b83c5d3a3d661d1896f9e7414c5730bcb (diff)
downloadlinux-a05a70db34ba24ca009e1c9cedaef26fd17d5470.tar.xz
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton: - fsnotify fix - poll() timeout fix - a few scripts/ tweaks - debugobjects updates - the (small) ocfs2 queue - Minor fixes to kernel/padata.c - Maybe half of the MM queue * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (117 commits) mm, page_alloc: restore the original nodemask if the fast path allocation failed mm, page_alloc: uninline the bad page part of check_new_page() mm, page_alloc: don't duplicate code in free_pcp_prepare mm, page_alloc: defer debugging checks of pages allocated from the PCP mm, page_alloc: defer debugging checks of freed pages until a PCP drain cpuset: use static key better and convert to new API mm, page_alloc: inline pageblock lookup in page free fast paths mm, page_alloc: remove unnecessary variable from free_pcppages_bulk mm, page_alloc: pull out side effects from free_pages_check mm, page_alloc: un-inline the bad part of free_pages_check mm, page_alloc: check multiple page fields with a single branch mm, page_alloc: remove field from alloc_context mm, page_alloc: avoid looking up the first zone in a zonelist twice mm, page_alloc: shortcut watermark checks for order-0 pages mm, page_alloc: reduce cost of fair zone allocation policy retry mm, page_alloc: shorten the page allocator fast path mm, page_alloc: check once if a zone has isolated pageblocks mm, page_alloc: move __GFP_HARDWALL modifications out of the fastpath mm, page_alloc: simplify last cpupid reset mm, page_alloc: remove unnecessary initialisation from __alloc_pages_nodemask() ...
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c112
1 files changed, 88 insertions, 24 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 86349586eacb..415f7eb913fa 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -412,6 +412,25 @@ bool oom_killer_disabled __read_mostly;
#define K(x) ((x) << (PAGE_SHIFT-10))
+/*
+ * task->mm can be NULL if the task is the exited group leader. So to
+ * determine whether the task is using a particular mm, we examine all the
+ * task's threads: if one of those is using this mm then this task was also
+ * using it.
+ */
+static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
+{
+ struct task_struct *t;
+
+ for_each_thread(p, t) {
+ struct mm_struct *t_mm = READ_ONCE(t->mm);
+ if (t_mm)
+ return t_mm == mm;
+ }
+ return false;
+}
+
+
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -491,14 +510,10 @@ static bool __oom_reap_task(struct task_struct *tsk)
up_read(&mm->mmap_sem);
/*
- * Clear TIF_MEMDIE because the task shouldn't be sitting on a
- * reasonably reclaimable memory anymore. OOM killer can continue
- * by selecting other victim if unmapping hasn't led to any
- * improvements. This also means that selecting this task doesn't
- * make any sense.
+ * This task can be safely ignored because we cannot do much more
+ * to release its memory.
*/
tsk->signal->oom_score_adj = OOM_SCORE_ADJ_MIN;
- exit_oom_victim(tsk);
out:
mmput(mm);
return ret;
@@ -519,6 +534,15 @@ static void oom_reap_task(struct task_struct *tsk)
debug_show_all_locks();
}
+ /*
+ * Clear TIF_MEMDIE because the task shouldn't be sitting on a
+ * reasonably reclaimable memory anymore or it is not a good candidate
+ * for the oom victim right now because it cannot release its memory
+ * itself nor by the oom reaper.
+ */
+ tsk->oom_reaper_list = NULL;
+ exit_oom_victim(tsk);
+
/* Drop a reference taken by wake_oom_reaper */
put_task_struct(tsk);
}
@@ -563,6 +587,53 @@ static void wake_oom_reaper(struct task_struct *tsk)
wake_up(&oom_reaper_wait);
}
+/* Check if we can reap the given task. This has to be called with stable
+ * tsk->mm
+ */
+void try_oom_reaper(struct task_struct *tsk)
+{
+ struct mm_struct *mm = tsk->mm;
+ struct task_struct *p;
+
+ if (!mm)
+ return;
+
+ /*
+ * There might be other threads/processes which are either not
+ * dying or even not killable.
+ */
+ if (atomic_read(&mm->mm_users) > 1) {
+ rcu_read_lock();
+ for_each_process(p) {
+ bool exiting;
+
+ if (!process_shares_mm(p, mm))
+ continue;
+ if (same_thread_group(p, tsk))
+ continue;
+ if (fatal_signal_pending(p))
+ continue;
+
+ /*
+ * If the task is exiting make sure the whole thread group
+ * is exiting and cannot acces mm anymore.
+ */
+ spin_lock_irq(&p->sighand->siglock);
+ exiting = signal_group_exit(p->signal);
+ spin_unlock_irq(&p->sighand->siglock);
+ if (exiting)
+ continue;
+
+ /* Give up */
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+ }
+
+ wake_oom_reaper(tsk);
+}
+
static int __init oom_init(void)
{
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -653,24 +724,6 @@ void oom_killer_enable(void)
}
/*
- * task->mm can be NULL if the task is the exited group leader. So to
- * determine whether the task is using a particular mm, we examine all the
- * task's threads: if one of those is using this mm then this task was also
- * using it.
- */
-static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
-{
- struct task_struct *t;
-
- for_each_thread(p, t) {
- struct mm_struct *t_mm = READ_ONCE(t->mm);
- if (t_mm)
- return t_mm == mm;
- }
- return false;
-}
-
-/*
* Must be called while holding a reference to p, which will be released upon
* returning.
*/
@@ -694,6 +747,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
task_lock(p);
if (p->mm && task_will_free_mem(p)) {
mark_oom_victim(p);
+ try_oom_reaper(p);
task_unlock(p);
put_task_struct(p);
return;
@@ -873,10 +927,20 @@ bool out_of_memory(struct oom_control *oc)
if (current->mm &&
(fatal_signal_pending(current) || task_will_free_mem(current))) {
mark_oom_victim(current);
+ try_oom_reaper(current);
return true;
}
/*
+ * The OOM killer does not compensate for IO-less reclaim.
+ * pagefault_out_of_memory lost its gfp context so we have to
+ * make sure exclude 0 mask - all other users should have at least
+ * ___GFP_DIRECT_RECLAIM to get here.
+ */
+ if (oc->gfp_mask && !(oc->gfp_mask & (__GFP_FS|__GFP_NOFAIL)))
+ return true;
+
+ /*
* Check if there were limitations on the allocation (only relevant for
* NUMA) that may require different handling.
*/