diff options
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r-- | mm/memory-failure.c | 206 |
1 files changed, 115 insertions, 91 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d84c5e5331bb..b7c171602ba1 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -206,7 +206,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno, #ifdef __ARCH_SI_TRAPNO si.si_trapno = trapno; #endif - si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT; + si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT; if ((flags & MF_ACTION_REQUIRED) && t == current) { si.si_code = BUS_MCEERR_AR; @@ -248,10 +248,12 @@ void shake_page(struct page *p, int access) */ if (access) { int nr; + int nid = page_to_nid(p); do { struct shrink_control shrink = { .gfp_mask = GFP_KERNEL, }; + node_set(nid, shrink.nodes_to_scan); nr = shrink_slab(&shrink, 1000, 1000); if (page_count(p) == 1) @@ -983,7 +985,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, static void set_page_hwpoison_huge_page(struct page *hpage) { int i; - int nr_pages = 1 << compound_trans_order(hpage); + int nr_pages = 1 << compound_order(hpage); for (i = 0; i < nr_pages; i++) SetPageHWPoison(hpage + i); } @@ -991,7 +993,7 @@ static void set_page_hwpoison_huge_page(struct page *hpage) static void clear_page_hwpoison_huge_page(struct page *hpage) { int i; - int nr_pages = 1 << compound_trans_order(hpage); + int nr_pages = 1 << compound_order(hpage); for (i = 0; i < nr_pages; i++) ClearPageHWPoison(hpage + i); } @@ -1112,8 +1114,10 @@ int memory_failure(unsigned long pfn, int trapno, int flags) * shake_page could have turned it free. */ if (is_free_buddy_page(p)) { - action_result(pfn, "free buddy, 2nd try", - DELAYED); + if (flags & MF_COUNT_INCREASED) + action_result(pfn, "free buddy", DELAYED); + else + action_result(pfn, "free buddy, 2nd try", DELAYED); return 0; } action_result(pfn, "non LRU", IGNORED); @@ -1204,6 +1208,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) for (ps = error_states;; ps++) if ((p->flags & ps->mask) == ps->res) break; + + page_flags |= (p->flags & (1UL << PG_dirty)); + if (!ps->mask) for (ps = error_states;; ps++) if ((page_flags & ps->mask) == ps->res) @@ -1262,7 +1269,7 @@ void memory_failure_queue(unsigned long pfn, int trapno, int flags) mf_cpu = &get_cpu_var(memory_failure_cpu); spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, &entry)) + if (kfifo_put(&mf_cpu->fifo, entry)) schedule_work_on(smp_processor_id(), &mf_cpu->work); else pr_err("Memory failure: buffer overflow when queuing memory failure at %#lx\n", @@ -1339,7 +1346,17 @@ int unpoison_memory(unsigned long pfn) return 0; } - nr_pages = 1 << compound_trans_order(page); + /* + * unpoison_memory() can encounter thp only when the thp is being + * worked by memory_failure() and the page lock is not held yet. + * In such case, we yield to memory_failure() and make unpoison fail. + */ + if (!PageHuge(page) && PageTransHuge(page)) { + pr_info("MCE: Memory failure is now running on %#lx\n", pfn); + return 0; + } + + nr_pages = 1 << compound_order(page); if (!get_page_unless_zero(page)) { /* @@ -1353,7 +1370,7 @@ int unpoison_memory(unsigned long pfn) return 0; } if (TestClearPageHWPoison(p)) - atomic_long_sub(nr_pages, &num_poisoned_pages); + atomic_long_dec(&num_poisoned_pages); pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn); return 0; } @@ -1375,7 +1392,7 @@ int unpoison_memory(unsigned long pfn) unlock_page(page); put_page(page); - if (freeit) + if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) put_page(page); return 0; @@ -1406,18 +1423,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) return 1; /* - * The lock_memory_hotplug prevents a race with memory hotplug. - * This is a big hammer, a better would be nicer. - */ - lock_memory_hotplug(); - - /* - * Isolate the page, so that it doesn't get reallocated if it - * was free. This flag should be kept set until the source page - * is freed and PG_hwpoison on it is set. - */ - set_migratetype_isolate(p, true); - /* * When the target page is a free hugepage, just remove it * from free hugepage list. */ @@ -1437,7 +1442,6 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags) /* Not a free page */ ret = 1; } - unlock_memory_hotplug(); return ret; } @@ -1470,6 +1474,7 @@ static int soft_offline_huge_page(struct page *page, int flags) int ret; unsigned long pfn = page_to_pfn(page); struct page *hpage = compound_head(page); + LIST_HEAD(pagelist); /* * This double-check of PageHWPoison is to avoid the race with @@ -1485,86 +1490,29 @@ static int soft_offline_huge_page(struct page *page, int flags) unlock_page(hpage); /* Keep page count to indicate a given hugepage is isolated. */ - ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, - MIGRATE_SYNC); - put_page(hpage); + list_move(&hpage->lru, &pagelist); + ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, + MIGRATE_SYNC, MR_MEMORY_FAILURE); if (ret) { pr_info("soft offline: %#lx: migration failed %d, type %lx\n", pfn, ret, page->flags); + /* + * We know that soft_offline_huge_page() tries to migrate + * only one hugepage pointed to by hpage, so we need not + * run through the pagelist here. + */ + putback_active_hugepage(hpage); + if (ret > 0) + ret = -EIO; } else { set_page_hwpoison_huge_page(hpage); dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_trans_order(hpage), + atomic_long_add(1 << compound_order(hpage), &num_poisoned_pages); } return ret; } -static int __soft_offline_page(struct page *page, int flags); - -/** - * soft_offline_page - Soft offline a page. - * @page: page to offline - * @flags: flags. Same as memory_failure(). - * - * Returns 0 on success, otherwise negated errno. - * - * Soft offline a page, by migration or invalidation, - * without killing anything. This is for the case when - * a page is not corrupted yet (so it's still valid to access), - * but has had a number of corrected errors and is better taken - * out. - * - * The actual policy on when to do that is maintained by - * user space. - * - * This should never impact any application or cause data loss, - * however it might take some time. - * - * This is not a 100% solution for all memory, but tries to be - * ``good enough'' for the majority of memory. - */ -int soft_offline_page(struct page *page, int flags) -{ - int ret; - unsigned long pfn = page_to_pfn(page); - struct page *hpage = compound_trans_head(page); - - if (PageHWPoison(page)) { - pr_info("soft offline: %#lx page already poisoned\n", pfn); - return -EBUSY; - } - if (!PageHuge(page) && PageTransHuge(hpage)) { - if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { - pr_info("soft offline: %#lx: failed to split THP\n", - pfn); - return -EBUSY; - } - } - - ret = get_any_page(page, pfn, flags); - if (ret < 0) - return ret; - if (ret) { /* for in-use pages */ - if (PageHuge(page)) - ret = soft_offline_huge_page(page, flags); - else - ret = __soft_offline_page(page, flags); - } else { /* for free pages */ - if (PageHuge(page)) { - set_page_hwpoison_huge_page(hpage); - dequeue_hwpoisoned_huge_page(hpage); - atomic_long_add(1 << compound_trans_order(hpage), - &num_poisoned_pages); - } else { - SetPageHWPoison(page); - atomic_long_inc(&num_poisoned_pages); - } - } - unset_migratetype_isolate(page, MIGRATE_MOVABLE); - return ret; -} - static int __soft_offline_page(struct page *page, int flags) { int ret; @@ -1651,3 +1599,79 @@ static int __soft_offline_page(struct page *page, int flags) } return ret; } + +/** + * soft_offline_page - Soft offline a page. + * @page: page to offline + * @flags: flags. Same as memory_failure(). + * + * Returns 0 on success, otherwise negated errno. + * + * Soft offline a page, by migration or invalidation, + * without killing anything. This is for the case when + * a page is not corrupted yet (so it's still valid to access), + * but has had a number of corrected errors and is better taken + * out. + * + * The actual policy on when to do that is maintained by + * user space. + * + * This should never impact any application or cause data loss, + * however it might take some time. + * + * This is not a 100% solution for all memory, but tries to be + * ``good enough'' for the majority of memory. + */ +int soft_offline_page(struct page *page, int flags) +{ + int ret; + unsigned long pfn = page_to_pfn(page); + struct page *hpage = compound_trans_head(page); + + if (PageHWPoison(page)) { + pr_info("soft offline: %#lx page already poisoned\n", pfn); + return -EBUSY; + } + if (!PageHuge(page) && PageTransHuge(hpage)) { + if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) { + pr_info("soft offline: %#lx: failed to split THP\n", + pfn); + return -EBUSY; + } + } + + /* + * The lock_memory_hotplug prevents a race with memory hotplug. + * This is a big hammer, a better would be nicer. + */ + lock_memory_hotplug(); + + /* + * Isolate the page, so that it doesn't get reallocated if it + * was free. This flag should be kept set until the source page + * is freed and PG_hwpoison on it is set. + */ + if (get_pageblock_migratetype(page) != MIGRATE_ISOLATE) + set_migratetype_isolate(page, true); + + ret = get_any_page(page, pfn, flags); + unlock_memory_hotplug(); + if (ret > 0) { /* for in-use pages */ + if (PageHuge(page)) + ret = soft_offline_huge_page(page, flags); + else + ret = __soft_offline_page(page, flags); + } else if (ret == 0) { /* for free pages */ + if (PageHuge(page)) { + set_page_hwpoison_huge_page(hpage); + dequeue_hwpoisoned_huge_page(hpage); + atomic_long_add(1 << compound_order(hpage), + &num_poisoned_pages); + } else { + SetPageHWPoison(page); + atomic_long_inc(&num_poisoned_pages); + } + } + unset_migratetype_isolate(page, MIGRATE_MOVABLE); + return ret; +} |