summaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c183
1 files changed, 57 insertions, 126 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b91a33fb6c69..3edebb0cda30 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -212,106 +212,34 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
return true;
}
-#if IS_ENABLED(CONFIG_HWPOISON_INJECT)
+static hwpoison_filter_func_t __rcu *hwpoison_filter_func __read_mostly;
-u32 hwpoison_filter_enable = 0;
-u32 hwpoison_filter_dev_major = ~0U;
-u32 hwpoison_filter_dev_minor = ~0U;
-u64 hwpoison_filter_flags_mask;
-u64 hwpoison_filter_flags_value;
-EXPORT_SYMBOL_GPL(hwpoison_filter_enable);
-EXPORT_SYMBOL_GPL(hwpoison_filter_dev_major);
-EXPORT_SYMBOL_GPL(hwpoison_filter_dev_minor);
-EXPORT_SYMBOL_GPL(hwpoison_filter_flags_mask);
-EXPORT_SYMBOL_GPL(hwpoison_filter_flags_value);
-
-static int hwpoison_filter_dev(struct page *p)
+void hwpoison_filter_register(hwpoison_filter_func_t *filter)
{
- struct folio *folio = page_folio(p);
- struct address_space *mapping;
- dev_t dev;
-
- if (hwpoison_filter_dev_major == ~0U &&
- hwpoison_filter_dev_minor == ~0U)
- return 0;
-
- mapping = folio_mapping(folio);
- if (mapping == NULL || mapping->host == NULL)
- return -EINVAL;
-
- dev = mapping->host->i_sb->s_dev;
- if (hwpoison_filter_dev_major != ~0U &&
- hwpoison_filter_dev_major != MAJOR(dev))
- return -EINVAL;
- if (hwpoison_filter_dev_minor != ~0U &&
- hwpoison_filter_dev_minor != MINOR(dev))
- return -EINVAL;
-
- return 0;
+ rcu_assign_pointer(hwpoison_filter_func, filter);
}
+EXPORT_SYMBOL_GPL(hwpoison_filter_register);
-static int hwpoison_filter_flags(struct page *p)
+void hwpoison_filter_unregister(void)
{
- if (!hwpoison_filter_flags_mask)
- return 0;
-
- if ((stable_page_flags(p) & hwpoison_filter_flags_mask) ==
- hwpoison_filter_flags_value)
- return 0;
- else
- return -EINVAL;
+ RCU_INIT_POINTER(hwpoison_filter_func, NULL);
+ synchronize_rcu();
}
+EXPORT_SYMBOL_GPL(hwpoison_filter_unregister);
-/*
- * This allows stress tests to limit test scope to a collection of tasks
- * by putting them under some memcg. This prevents killing unrelated/important
- * processes such as /sbin/init. Note that the target task may share clean
- * pages with init (eg. libc text), which is harmless. If the target task
- * share _dirty_ pages with another task B, the test scheme must make sure B
- * is also included in the memcg. At last, due to race conditions this filter
- * can only guarantee that the page either belongs to the memcg tasks, or is
- * a freed page.
- */
-#ifdef CONFIG_MEMCG
-u64 hwpoison_filter_memcg;
-EXPORT_SYMBOL_GPL(hwpoison_filter_memcg);
-static int hwpoison_filter_task(struct page *p)
+static int hwpoison_filter(struct page *p)
{
- if (!hwpoison_filter_memcg)
- return 0;
-
- if (page_cgroup_ino(p) != hwpoison_filter_memcg)
- return -EINVAL;
-
- return 0;
-}
-#else
-static int hwpoison_filter_task(struct page *p) { return 0; }
-#endif
-
-int hwpoison_filter(struct page *p)
-{
- if (!hwpoison_filter_enable)
- return 0;
-
- if (hwpoison_filter_dev(p))
- return -EINVAL;
-
- if (hwpoison_filter_flags(p))
- return -EINVAL;
+ int ret = 0;
+ hwpoison_filter_func_t *filter;
- if (hwpoison_filter_task(p))
- return -EINVAL;
+ rcu_read_lock();
+ filter = rcu_dereference(hwpoison_filter_func);
+ if (filter)
+ ret = filter(p);
+ rcu_read_unlock();
- return 0;
-}
-EXPORT_SYMBOL_GPL(hwpoison_filter);
-#else
-int hwpoison_filter(struct page *p)
-{
- return 0;
+ return ret;
}
-#endif
/*
* Kill all processes that have a poisoned page mapped and then isolate
@@ -837,19 +765,33 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
struct mm_walk *walk)
{
struct hwpoison_walk *hwp = walk->private;
- pte_t pte = huge_ptep_get(walk->mm, addr, ptep);
struct hstate *h = hstate_vma(walk->vma);
+ spinlock_t *ptl;
+ pte_t pte;
+ int ret;
- return check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
- hwp->pfn, &hwp->tk);
+ ptl = huge_pte_lock(h, walk->mm, ptep);
+ pte = huge_ptep_get(walk->mm, addr, ptep);
+ ret = check_hwpoisoned_entry(pte, addr, huge_page_shift(h),
+ hwp->pfn, &hwp->tk);
+ spin_unlock(ptl);
+ return ret;
}
#else
#define hwpoison_hugetlb_range NULL
#endif
+static int hwpoison_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ /* We also want to consider pages mapped into VM_PFNMAP. */
+ return 0;
+}
+
static const struct mm_walk_ops hwpoison_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
+ .test_walk = hwpoison_test_walk,
.walk_lock = PGWALK_RDLOCK,
};
@@ -942,7 +884,7 @@ static const char * const action_page_types[] = {
[MF_MSG_BUDDY] = "free buddy page",
[MF_MSG_DAX] = "dax page",
[MF_MSG_UNSPLIT_THP] = "unsplit thp",
- [MF_MSG_ALREADY_POISONED] = "already poisoned",
+ [MF_MSG_ALREADY_POISONED] = "already poisoned page",
[MF_MSG_UNKNOWN] = "unknown page",
};
@@ -1185,7 +1127,7 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p)
struct folio *folio = page_folio(p);
int ret;
- delete_from_swap_cache(folio);
+ swap_cache_del_folio(folio);
ret = delete_from_lru_cache(folio) ? MF_FAILED : MF_RECOVERED;
folio_unlock(folio);
@@ -1335,9 +1277,10 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type,
{
trace_memory_failure_event(pfn, type, result);
- num_poisoned_pages_inc(pfn);
-
- update_per_node_mf_stats(pfn, result);
+ if (type != MF_MSG_ALREADY_POISONED) {
+ num_poisoned_pages_inc(pfn);
+ update_per_node_mf_stats(pfn, result);
+ }
pr_err("%#lx: recovery action for %s: %s\n",
pfn, action_page_types[type], action_name[result]);
@@ -1388,8 +1331,8 @@ static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
if (PageSlab(page))
return false;
- /* Soft offline could migrate non-LRU movable pages */
- if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
+ /* Soft offline could migrate movable_ops pages */
+ if ((flags & MF_SOFT_OFFLINE) && page_has_movable_ops(page))
return true;
return PageLRU(page) || is_free_buddy_page(page);
@@ -1561,6 +1504,10 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
return ret;
}
+/*
+ * The caller must guarantee the folio isn't large folio, except hugetlb.
+ * try_to_unmap() can't handle it.
+ */
int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill)
{
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON;
@@ -1689,10 +1636,10 @@ static int identify_page_state(unsigned long pfn, struct page *p,
* carried out only if the first check can't determine the page status.
*/
for (ps = error_states;; ps++)
- if ((p->flags & ps->mask) == ps->res)
+ if ((p->flags.f & ps->mask) == ps->res)
break;
- page_flags |= (p->flags & (1UL << PG_dirty));
+ page_flags |= (p->flags.f & (1UL << PG_dirty));
if (!ps->mask)
for (ps = error_states;; ps++)
@@ -2076,12 +2023,11 @@ retry:
*hugetlb = 0;
return 0;
} else if (res == -EHWPOISON) {
- pr_err("%#lx: already hardware poisoned\n", pfn);
if (flags & MF_ACTION_REQUIRED) {
folio = page_folio(p);
res = kill_accessing_process(current, folio_pfn(folio), flags);
- action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
}
+ action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED);
return res;
} else if (res == -EBUSY) {
if (!(flags & MF_NO_RETRY)) {
@@ -2119,7 +2065,7 @@ retry:
return action_result(pfn, MF_MSG_FREE_HUGE, res);
}
- page_flags = folio->flags;
+ page_flags = folio->flags.f;
if (!hwpoison_user_mappings(folio, p, pfn, flags)) {
folio_unlock(folio);
@@ -2248,7 +2194,7 @@ int memory_failure(unsigned long pfn, int flags)
goto unlock_mutex;
if (pfn_valid(pfn)) {
- pgmap = get_dev_pagemap(pfn, NULL);
+ pgmap = get_dev_pagemap(pfn);
put_ref_page(pfn, flags);
if (pgmap) {
res = memory_failure_dev_pagemap(pfn, flags,
@@ -2267,7 +2213,6 @@ try_again:
goto unlock_mutex;
if (TestSetPageHWPoison(p)) {
- pr_err("%#lx: already hardware poisoned\n", pfn);
res = -EHWPOISON;
if (flags & MF_ACTION_REQUIRED)
res = kill_accessing_process(current, pfn, flags);
@@ -2380,7 +2325,7 @@ try_again:
* folio_remove_rmap_*() in try_to_unmap_one(). So to determine page
* status correctly, we save a copy of the page flags at this time.
*/
- page_flags = folio->flags;
+ page_flags = folio->flags.f;
/*
* __munlock_folio() may clear a writeback folio's LRU flag without
@@ -2503,19 +2448,6 @@ static void memory_failure_work_func(struct work_struct *work)
}
}
-/*
- * Process memory_failure work queued on the specified CPU.
- * Used to avoid return-to-userspace racing with the memory_failure workqueue.
- */
-void memory_failure_queue_kick(int cpu)
-{
- struct memory_failure_cpu *mf_cpu;
-
- mf_cpu = &per_cpu(memory_failure_cpu, cpu);
- cancel_work_sync(&mf_cpu->work);
- memory_failure_work_func(&mf_cpu->work);
-}
-
static int __init memory_failure_init(void)
{
struct memory_failure_cpu *mf_cpu;
@@ -2564,10 +2496,9 @@ int unpoison_memory(unsigned long pfn)
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
- if (!pfn_valid(pfn))
- return -ENXIO;
-
- p = pfn_to_page(pfn);
+ p = pfn_to_online_page(pfn);
+ if (!p)
+ return -EIO;
folio = page_folio(p);
mutex_lock(&mf_mutex);
@@ -2739,13 +2670,13 @@ static int soft_offline_in_use_page(struct page *page)
putback_movable_pages(&pagelist);
pr_info("%#lx: %s migration failed %ld, type %pGp\n",
- pfn, msg_page[huge], ret, &page->flags);
+ pfn, msg_page[huge], ret, &page->flags.f);
if (ret > 0)
ret = -EBUSY;
}
} else {
pr_info("%#lx: %s isolation failed, page count %d, type %pGp\n",
- pfn, msg_page[huge], page_count(page), &page->flags);
+ pfn, msg_page[huge], page_count(page), &page->flags.f);
ret = -EBUSY;
}
return ret;