From 996ff7a08dae591f5e87852281477d26a83b393c Mon Sep 17 00:00:00 2001 From: Jane Chu Date: Sat, 30 Nov 2019 17:53:35 -0800 Subject: mm/memory-failure.c clean up around tk pre-allocation add_to_kill() expects the first 'tk' to be pre-allocated, it makes subsequent allocations on need basis, this makes the code a bit difficult to read. Move all the allocation internal to add_to_kill() and drop the **tk argument. Link: http://lkml.kernel.org/r/1565112345-28754-2-git-send-email-jane.chu@oracle.com Signed-off-by: Jane Chu Reviewed-by: Dan Williams Acked-by: Naoya Horiguchi Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 40 +++++++++++++--------------------------- 1 file changed, 13 insertions(+), 27 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3151c87dff73..05c8c6df25e6 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -303,25 +303,19 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page, /* * Schedule a process for later kill. * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM. - * TBD would GFP_NOIO be enough? */ static void add_to_kill(struct task_struct *tsk, struct page *p, struct vm_area_struct *vma, - struct list_head *to_kill, - struct to_kill **tkc) + struct list_head *to_kill) { struct to_kill *tk; - if (*tkc) { - tk = *tkc; - *tkc = NULL; - } else { - tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); - if (!tk) { - pr_err("Memory failure: Out of memory while machine check handling\n"); - return; - } + tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC); + if (!tk) { + pr_err("Memory failure: Out of memory while machine check handling\n"); + return; } + tk->addr = page_address_in_vma(p, vma); if (is_zone_device_page(p)) tk->size_shift = dev_pagemap_mapping_shift(p, vma); @@ -345,6 +339,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, kfree(tk); return; } + get_task_struct(tsk); tk->tsk = tsk; list_add_tail(&tk->nd, to_kill); @@ -436,7 +431,7 @@ static struct task_struct *task_early_kill(struct task_struct *tsk, * Collect processes when the error hit an anonymous page. */ static void collect_procs_anon(struct page *page, struct list_head *to_kill, - struct to_kill **tkc, int force_early) + int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -461,7 +456,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, if (!page_mapped_in_vma(page, vma)) continue; if (vma->vm_mm == t->mm) - add_to_kill(t, page, vma, to_kill, tkc); + add_to_kill(t, page, vma, to_kill); } } read_unlock(&tasklist_lock); @@ -472,7 +467,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, * Collect processes when the error hit a file mapped page. */ static void collect_procs_file(struct page *page, struct list_head *to_kill, - struct to_kill **tkc, int force_early) + int force_early) { struct vm_area_struct *vma; struct task_struct *tsk; @@ -496,7 +491,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, * to be informed of all such data corruptions. */ if (vma->vm_mm == t->mm) - add_to_kill(t, page, vma, to_kill, tkc); + add_to_kill(t, page, vma, to_kill); } } read_unlock(&tasklist_lock); @@ -505,26 +500,17 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, /* * Collect the processes who have the corrupted page mapped to kill. - * This is done in two steps for locking reasons. - * First preallocate one tokill structure outside the spin locks, - * so that we can kill at least one process reasonably reliable. */ static void collect_procs(struct page *page, struct list_head *tokill, int force_early) { - struct to_kill *tk; - if (!page->mapping) return; - tk = kmalloc(sizeof(struct to_kill), GFP_NOIO); - if (!tk) - return; if (PageAnon(page)) - collect_procs_anon(page, tokill, &tk, force_early); + collect_procs_anon(page, tokill, force_early); else - collect_procs_file(page, tokill, &tk, force_early); - kfree(tk); + collect_procs_file(page, tokill, force_early); } static const char *action_name[] = { -- cgit v1.2.3 From feec24a6139d4640c6ef344e0271a8cd4d509e60 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Sat, 30 Nov 2019 17:53:38 -0800 Subject: mm, soft-offline: convert parameter to pfn Currently soft_offline_page() receives struct page, and its sibling memory_failure() receives pfn. This discrepancy looks weird and makes precheck on pfn validity tricky. So let's align them. Link: http://lkml.kernel.org/r/20191016234706.GA5493@www9186uo.sakura.ne.jp Signed-off-by: Naoya Horiguchi Acked-by: Andrew Morton Cc: David Hildenbrand Cc: Michal Hocko Cc: Oscar Salvador Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/memory.c | 7 +------ include/linux/mm.h | 2 +- mm/madvise.c | 2 +- mm/memory-failure.c | 19 +++++++++---------- 4 files changed, 12 insertions(+), 18 deletions(-) (limited to 'mm/memory-failure.c') diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 84c4e1f72cbd..d65ecdeb83e8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -538,12 +538,7 @@ static ssize_t soft_offline_page_store(struct device *dev, if (kstrtoull(buf, 0, &pfn) < 0) return -EINVAL; pfn >>= PAGE_SHIFT; - if (!pfn_valid(pfn)) - return -ENXIO; - /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ - if (!pfn_to_online_page(pfn)) - return -EIO; - ret = soft_offline_page(pfn_to_page(pfn), 0); + ret = soft_offline_page(pfn, 0); return ret == 0 ? count : ret; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 06b51d8728ec..19a0e687878a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2773,7 +2773,7 @@ extern int sysctl_memory_failure_early_kill; extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages __read_mostly; -extern int soft_offline_page(struct page *page, int flags); +extern int soft_offline_page(unsigned long pfn, int flags); /* diff --git a/mm/madvise.c b/mm/madvise.c index 94c343b4c968..63e130800570 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -895,7 +895,7 @@ static int madvise_inject_error(int behavior, pr_info("Soft offlining pfn %#lx at process virtual address %#lx\n", pfn, start); - ret = soft_offline_page(page, MF_COUNT_INCREASED); + ret = soft_offline_page(pfn, MF_COUNT_INCREASED); if (ret) return ret; continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 05c8c6df25e6..af2712004a4d 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1476,7 +1476,7 @@ static void memory_failure_work_func(struct work_struct *work) if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) - soft_offline_page(pfn_to_page(entry.pfn), entry.flags); + soft_offline_page(entry.pfn, entry.flags); else memory_failure(entry.pfn, entry.flags); } @@ -1857,7 +1857,7 @@ static int soft_offline_free_page(struct page *page) /** * soft_offline_page - Soft offline a page. - * @page: page to offline + * @pfn: pfn to soft-offline * @flags: flags. Same as memory_failure(). * * Returns 0 on success, otherwise negated errno. @@ -1877,18 +1877,17 @@ static int soft_offline_free_page(struct page *page) * This is not a 100% solution for all memory, but tries to be * ``good enough'' for the majority of memory. */ -int soft_offline_page(struct page *page, int flags) +int soft_offline_page(unsigned long pfn, int flags) { int ret; - unsigned long pfn = page_to_pfn(page); + struct page *page; - if (is_zone_device_page(page)) { - pr_debug_ratelimited("soft_offline: %#lx page is device page\n", - pfn); - if (flags & MF_COUNT_INCREASED) - put_page(page); + if (!pfn_valid(pfn)) + return -ENXIO; + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ + page = pfn_to_online_page(pfn); + if (!page) return -EIO; - } if (PageHWPoison(page)) { pr_info("soft offline: %#lx page already poisoned\n", pfn); -- cgit v1.2.3 From 7506851837350e112685ddf4d13ba03a558f9e20 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Sat, 30 Nov 2019 17:53:41 -0800 Subject: mm/memory-failure.c: use page_shift() in add_to_kill() page_shift() is supported after the commit 94ad9338109f ("mm: introduce page_shift()"). So replace with page_shift() in add_to_kill() for readability. Link: http://lkml.kernel.org/r/543d8bc9-f2e7-3023-7c35-2e7ed67c0e82@huawei.com Signed-off-by: Yunfeng Ye Reviewed-by: David Hildenbrand Acked-by: Naoya Horiguchi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory-failure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory-failure.c') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index af2712004a4d..41c634f45d45 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -320,7 +320,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, if (is_zone_device_page(p)) tk->size_shift = dev_pagemap_mapping_shift(p, vma); else - tk->size_shift = compound_order(compound_head(p)) + PAGE_SHIFT; + tk->size_shift = page_shift(compound_head(p)); /* * Send SIGKILL if "tk->addr == -EFAULT". Also, as -- cgit v1.2.3