diff options
author | Jiri Kosina <jkosina@suse.cz> | 2012-04-08 23:48:52 +0400 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2012-04-08 23:48:52 +0400 |
commit | e75d660672ddd11704b7f0fdb8ff21968587b266 (patch) | |
tree | ccb9c107744c10b553c0373e450bee3971d16c00 /mm/memory-failure.c | |
parent | 61282f37927143e45b03153f3e7b48d6b702147a (diff) | |
parent | 0034102808e0dbbf3a2394b82b1bb40b5778de9e (diff) | |
download | linux-e75d660672ddd11704b7f0fdb8ff21968587b266.tar.xz |
Merge branch 'master' into for-next
Merge with latest Linus' tree, as I have incoming patches
that fix code that is newer than current HEAD of for-next.
Conflicts:
drivers/net/ethernet/realtek/r8169.c
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 98 |
1 files changed, 51 insertions, 47 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 56080ea36140..97cc2733551a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p) EXPORT_SYMBOL_GPL(hwpoison_filter); /* - * Send all the processes who have the page mapped an ``action optional'' - * signal. + * Send all the processes who have the page mapped a signal. + * ``action optional'' if they are not immediately affected by the error + * ``action required'' if error happened in current execution context */ -static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno, - unsigned long pfn, struct page *page) +static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, + unsigned long pfn, struct page *page, int flags) { struct siginfo si; int ret; printk(KERN_ERR - "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n", + "MCE %#lx: Killing %s:%d due to hardware memory corruption\n", pfn, t->comm, t->pid); si.si_signo = SIGBUS; si.si_errno = 0; - si.si_code = BUS_MCEERR_AO; si.si_addr = (void *)addr; #ifdef __ARCH_SI_TRAPNO si.si_trapno = trapno; #endif si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; - /* - * Don't use force here, it's convenient if the signal - * can be temporarily blocked. - * This could cause a loop when the user sets SIGBUS - * to SIG_IGN, but hopefully no one will do that? - */ - ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ + + if ((flags & MF_ACTION_REQUIRED) && t == current) { + si.si_code = BUS_MCEERR_AR; + ret = force_sig_info(SIGBUS, &si, t); + } else { + /* + * Don't use force here, it's convenient if the signal + * can be temporarily blocked. + * This could cause a loop when the user sets SIGBUS + * to SIG_IGN, but hopefully no one will do that? + */ + si.si_code = BUS_MCEERR_AO; + ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */ + } if (ret < 0) printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n", t->comm, t->pid, ret); @@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, * Also when FAIL is set do a force kill because something went * wrong earlier. */ -static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, - int fail, struct page *page, unsigned long pfn) +static void kill_procs(struct list_head *to_kill, int doit, int trapno, + int fail, struct page *page, unsigned long pfn, + int flags) { struct to_kill *tk, *next; @@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, * check for that, but we need to tell the * process anyways. */ - else if (kill_proc_ao(tk->tsk, tk->addr, trapno, - pfn, page) < 0) + else if (kill_proc(tk->tsk, tk->addr, trapno, + pfn, page, flags) < 0) printk(KERN_ERR "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n", pfn, tk->tsk->comm, tk->tsk->pid); @@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p, * the pages and send SIGBUS to the processes if the data was dirty. */ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, - int trapno) + int trapno, int flags) { enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS; struct address_space *mapping; @@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, * use a more force-full uncatchable kill to prevent * any accesses to the poisoned memory. */ - kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, - ret != SWAP_SUCCESS, p, pfn); + kill_procs(&tokill, !!PageDirty(ppage), trapno, + ret != SWAP_SUCCESS, p, pfn, flags); return ret; } @@ -984,7 +992,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage) ClearPageHWPoison(hpage + i); } -int __memory_failure(unsigned long pfn, int trapno, int flags) +/** + * memory_failure - Handle memory failure of a page. + * @pfn: Page Number of the corrupted page + * @trapno: Trap number reported in the signal to user space. + * @flags: fine tune action taken + * + * This function is called by the low level machine check code + * of an architecture when it detects hardware memory corruption + * of a page. It tries its best to recover, which includes + * dropping pages, killing processes etc. + * + * The function is primarily of use for corruptions that + * happen outside the current execution context (e.g. when + * detected by a background scrubber) + * + * Must run in process context (e.g. a work queue) with interrupts + * enabled and no spinlocks hold. + */ +int memory_failure(unsigned long pfn, int trapno, int flags) { struct page_state *ps; struct page *p; @@ -1063,7 +1089,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) * The check (unnecessarily) ignores LRU pages being isolated and * walked by the page reclaim code, however that's not a big loss. */ - if (!PageHuge(p) && !PageTransCompound(p)) { + if (!PageHuge(p) && !PageTransTail(p)) { if (!PageLRU(p)) shake_page(p, 0); if (!PageLRU(p)) { @@ -1130,7 +1156,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) * Now take care of user space mappings. * Abort on fail: __delete_from_page_cache() assumes unmapped page. */ - if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) { + if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) { printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn); res = -EBUSY; goto out; @@ -1156,29 +1182,7 @@ out: unlock_page(hpage); return res; } -EXPORT_SYMBOL_GPL(__memory_failure); - -/** - * memory_failure - Handle memory failure of a page. - * @pfn: Page Number of the corrupted page - * @trapno: Trap number reported in the signal to user space. - * - * This function is called by the low level machine check code - * of an architecture when it detects hardware memory corruption - * of a page. It tries its best to recover, which includes - * dropping pages, killing processes etc. - * - * The function is primarily of use for corruptions that - * happen outside the current execution context (e.g. when - * detected by a background scrubber) - * - * Must run in process context (e.g. a work queue) with interrupts - * enabled and no spinlocks hold. - */ -void memory_failure(unsigned long pfn, int trapno) -{ - __memory_failure(pfn, trapno, 0); -} +EXPORT_SYMBOL_GPL(memory_failure); #define MEMORY_FAILURE_FIFO_ORDER 4 #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER) @@ -1251,7 +1255,7 @@ static void memory_failure_work_func(struct work_struct *work) spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; - __memory_failure(entry.pfn, entry.trapno, entry.flags); + memory_failure(entry.pfn, entry.trapno, entry.flags); } } |