From 7d2eba0557c18f7522b98befed98799990dd4fdb Mon Sep 17 00:00:00 2001 From: Ebru Akagunduz Date: Thu, 14 Jan 2016 15:22:19 -0800 Subject: mm: add tracepoint for scanning pages This patch series makes swapin readahead up to a certain number to gain more thp performance and adds tracepoint for khugepaged_scan_pmd, collapse_huge_page, __collapse_huge_page_isolate. This patch series was written to deal with programs that access most, but not all, of their memory after they get swapped out. Currently these programs do not get their memory collapsed into THPs after the system swapped their memory out, while they would get THPs before swapping happened. This patch series was tested with a test program, it allocates 400MB of memory, writes to it, and then sleeps. I force the system to swap out all. Afterwards, the test program touches the area by writing and leaves a piece of it without writing. This shows how much swap in readahead made by the patch. Test results: After swapped out ------------------------------------------------------------------- | Anonymous | AnonHugePages | Swap | Fraction | ------------------------------------------------------------------- With patch | 90076 kB | 88064 kB | 309928 kB | %99 | ------------------------------------------------------------------- Without patch | 194068 kB | 192512 kB | 205936 kB | %99 | ------------------------------------------------------------------- After swapped in ------------------------------------------------------------------- | Anonymous | AnonHugePages | Swap | Fraction | ------------------------------------------------------------------- With patch | 201408 kB | 198656 kB | 198596 kB | %98 | ------------------------------------------------------------------- Without patch | 292624 kB | 192512 kB | 107380 kB | %65 | ------------------------------------------------------------------- This patch (of 3): Using static tracepoints, data of functions is recorded. It is good to automatize debugging without doing a lot of changes in the source code. This patch adds tracepoint for khugepaged_scan_pmd, collapse_huge_page and __collapse_huge_page_isolate. [dan.carpenter@oracle.com: add a missing tab] Signed-off-by: Ebru Akagunduz Acked-by: Kirill A. Shutemov Acked-by: Rik van Riel Cc: Naoya Horiguchi Cc: Andrea Arcangeli Cc: Joonsoo Kim Cc: Xie XiuQi Cc: Cyrill Gorcunov Cc: Mel Gorman Cc: David Rientjes Cc: Vlastimil Babka Cc: Aneesh Kumar K.V Cc: Hugh Dickins Cc: Johannes Weiner Cc: Michal Hocko Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/huge_memory.h | 136 +++++++++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 include/trace/events/huge_memory.h (limited to 'include/trace/events/huge_memory.h') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h new file mode 100644 index 000000000000..97d635cabac8 --- /dev/null +++ b/include/trace/events/huge_memory.h @@ -0,0 +1,136 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM huge_memory + +#if !defined(__HUGE_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ) +#define __HUGE_MEMORY_H + +#include + +#include + +#define SCAN_STATUS \ + EM( SCAN_FAIL, "failed") \ + EM( SCAN_SUCCEED, "succeeded") \ + EM( SCAN_PMD_NULL, "pmd_null") \ + EM( SCAN_EXCEED_NONE_PTE, "exceed_none_pte") \ + EM( SCAN_PTE_NON_PRESENT, "pte_non_present") \ + EM( SCAN_PAGE_RO, "no_writable_page") \ + EM( SCAN_NO_REFERENCED_PAGE, "no_referenced_page") \ + EM( SCAN_PAGE_NULL, "page_null") \ + EM( SCAN_SCAN_ABORT, "scan_aborted") \ + EM( SCAN_PAGE_COUNT, "not_suitable_page_count") \ + EM( SCAN_PAGE_LRU, "page_not_in_lru") \ + EM( SCAN_PAGE_LOCK, "page_locked") \ + EM( SCAN_PAGE_ANON, "page_not_anon") \ + EM( SCAN_ANY_PROCESS, "no_process_for_page") \ + EM( SCAN_VMA_NULL, "vma_null") \ + EM( SCAN_VMA_CHECK, "vma_check_failed") \ + EM( SCAN_ADDRESS_RANGE, "not_suitable_address_range") \ + EM( SCAN_SWAP_CACHE_PAGE, "page_swap_cache") \ + EM( SCAN_DEL_PAGE_LRU, "could_not_delete_page_from_lru")\ + EM( SCAN_ALLOC_HUGE_PAGE_FAIL, "alloc_huge_page_failed") \ + EMe( SCAN_CGROUP_CHARGE_FAIL, "ccgroup_charge_failed") + +#undef EM +#undef EMe +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +SCAN_STATUS + +#undef EM +#undef EMe +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} + +TRACE_EVENT(mm_khugepaged_scan_pmd, + + TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable, + bool referenced, int none_or_zero, int status), + + TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status), + + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(unsigned long, pfn) + __field(bool, writable) + __field(bool, referenced) + __field(int, none_or_zero) + __field(int, status) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->pfn = pfn; + __entry->writable = writable; + __entry->referenced = referenced; + __entry->none_or_zero = none_or_zero; + __entry->status = status; + ), + + TP_printk("mm=%p, scan_pfn=0x%lx, writable=%d, referenced=%d, none_or_zero=%d, status=%s", + __entry->mm, + __entry->pfn, + __entry->writable, + __entry->referenced, + __entry->none_or_zero, + __print_symbolic(__entry->status, SCAN_STATUS)) +); + +TRACE_EVENT(mm_collapse_huge_page, + + TP_PROTO(struct mm_struct *mm, int isolated, int status), + + TP_ARGS(mm, isolated, status), + + TP_STRUCT__entry( + __field(struct mm_struct *, mm) + __field(int, isolated) + __field(int, status) + ), + + TP_fast_assign( + __entry->mm = mm; + __entry->isolated = isolated; + __entry->status = status; + ), + + TP_printk("mm=%p, isolated=%d, status=%s", + __entry->mm, + __entry->isolated, + __print_symbolic(__entry->status, SCAN_STATUS)) +); + +TRACE_EVENT(mm_collapse_huge_page_isolate, + + TP_PROTO(unsigned long pfn, int none_or_zero, + bool referenced, bool writable, int status), + + TP_ARGS(pfn, none_or_zero, referenced, writable, status), + + TP_STRUCT__entry( + __field(unsigned long, pfn) + __field(int, none_or_zero) + __field(bool, referenced) + __field(bool, writable) + __field(int, status) + ), + + TP_fast_assign( + __entry->pfn = pfn; + __entry->none_or_zero = none_or_zero; + __entry->referenced = referenced; + __entry->writable = writable; + __entry->status = status; + ), + + TP_printk("scan_pfn=0x%lx, none_or_zero=%d, referenced=%d, writable=%d, status=%s", + __entry->pfn, + __entry->none_or_zero, + __entry->referenced, + __entry->writable, + __print_symbolic(__entry->status, SCAN_STATUS)) +); + +#endif /* __HUGE_MEMORY_H */ +#include -- cgit v1.2.3 From b1caa957ae6da3142a73ba8c5c9b2ca821021f0f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 15 Jan 2016 16:52:39 -0800 Subject: khugepaged: ignore pmd tables with THP mapped with ptes Prepare khugepaged to see compound pages mapped with pte. For now we won't collapse the pmd table with such pte. khugepaged is subject for future rework wrt new refcounting. Signed-off-by: Kirill A. Shutemov Tested-by: Sasha Levin Tested-by: Aneesh Kumar K.V Acked-by: Jerome Marchand Acked-by: Vlastimil Babka Cc: Andrea Arcangeli Cc: Hugh Dickins Cc: Dave Hansen Cc: Mel Gorman Cc: Rik van Riel Cc: Naoya Horiguchi Cc: Steve Capper Cc: Johannes Weiner Cc: Michal Hocko Cc: Christoph Lameter Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/huge_memory.h | 1 + mm/huge_memory.c | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/trace/events/huge_memory.h') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 97d635cabac8..0f803d2783e3 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -22,6 +22,7 @@ EM( SCAN_PAGE_LRU, "page_not_in_lru") \ EM( SCAN_PAGE_LOCK, "page_locked") \ EM( SCAN_PAGE_ANON, "page_not_anon") \ + EM( SCAN_PAGE_COMPOUND, "page_compound") \ EM( SCAN_ANY_PROCESS, "no_process_for_page") \ EM( SCAN_VMA_NULL, "vma_null") \ EM( SCAN_VMA_CHECK, "vma_check_failed") \ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b3cc9f27a0ee..f4da89cef2cd 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -45,6 +45,7 @@ enum scan_result { SCAN_PAGE_LRU, SCAN_PAGE_LOCK, SCAN_PAGE_ANON, + SCAN_PAGE_COMPOUND, SCAN_ANY_PROCESS, SCAN_VMA_NULL, SCAN_VMA_CHECK, @@ -2770,6 +2771,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, result = SCAN_PAGE_NULL; goto out_unmap; } + + /* TODO: teach khugepaged to collapse THP mapped with pte */ + if (PageCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out_unmap; + } + /* * Record which node the original page is from and save this * information to khugepaged_node_load[]. @@ -2782,7 +2790,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, goto out_unmap; } khugepaged_node_load[node]++; - VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page)) { result = SCAN_SCAN_ABORT; goto out_unmap; -- cgit v1.2.3 From 16fd0fe4aa92d0d621ecfe21de86f7fdcfa41947 Mon Sep 17 00:00:00 2001 From: yalin wang Date: Thu, 21 Jan 2016 16:40:30 -0800 Subject: mm: fix kernel crash in khugepaged thread This crash is caused by NULL pointer deference, in page_to_pfn() marco, when page == NULL : Unable to handle kernel NULL pointer dereference at virtual address 00000000 Internal error: Oops: 94000006 [#1] SMP Modules linked in: CPU: 1 PID: 26 Comm: khugepaged Tainted: G W 4.3.0-rc6-next-20151022ajb-00001-g32f3386-dirty #3 PC is at khugepaged+0x378/0x1af8 LR is at khugepaged+0x418/0x1af8 Process khugepaged (pid: 26, stack limit = 0xffffffc079638020) Call trace: khugepaged+0x378/0x1af8 kthread+0xdc/0xf4 ret_from_fork+0xc/0x40 Code: 35001700 f0002c60 aa0703e3 f9009fa0 (f94000e0) ---[ end trace 637503d8e28ae69e ]--- Kernel panic - not syncing: Fatal exception CPU2: stopping CPU: 2 PID: 0 Comm: swapper/2 Tainted: G D W 4.3.0-rc6-next-20151022ajb-00001-g32f3386-dirty #3 Hardware name: linux,dummy-virt (DT) [akpm@linux-foundation.org: fix fat-fingered merge resolution] Signed-off-by: yalin wang Acked-by: Vlastimil Babka Acked-by: Kirill A. Shutemov Acked-by: David Rientjes Cc: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/huge_memory.h | 12 ++++++------ mm/huge_memory.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/trace/events/huge_memory.h') diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 0f803d2783e3..47c6212d8f3c 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -46,10 +46,10 @@ SCAN_STATUS TRACE_EVENT(mm_khugepaged_scan_pmd, - TP_PROTO(struct mm_struct *mm, unsigned long pfn, bool writable, + TP_PROTO(struct mm_struct *mm, struct page *page, bool writable, bool referenced, int none_or_zero, int status), - TP_ARGS(mm, pfn, writable, referenced, none_or_zero, status), + TP_ARGS(mm, page, writable, referenced, none_or_zero, status), TP_STRUCT__entry( __field(struct mm_struct *, mm) @@ -62,7 +62,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd, TP_fast_assign( __entry->mm = mm; - __entry->pfn = pfn; + __entry->pfn = page ? page_to_pfn(page) : -1; __entry->writable = writable; __entry->referenced = referenced; __entry->none_or_zero = none_or_zero; @@ -104,10 +104,10 @@ TRACE_EVENT(mm_collapse_huge_page, TRACE_EVENT(mm_collapse_huge_page_isolate, - TP_PROTO(unsigned long pfn, int none_or_zero, + TP_PROTO(struct page *page, int none_or_zero, bool referenced, bool writable, int status), - TP_ARGS(pfn, none_or_zero, referenced, writable, status), + TP_ARGS(page, none_or_zero, referenced, writable, status), TP_STRUCT__entry( __field(unsigned long, pfn) @@ -118,7 +118,7 @@ TRACE_EVENT(mm_collapse_huge_page_isolate, ), TP_fast_assign( - __entry->pfn = pfn; + __entry->pfn = page ? page_to_pfn(page) : -1; __entry->none_or_zero = none_or_zero; __entry->referenced = referenced; __entry->writable = writable; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2d1ffe9d0e26..fd3a07b3e6f4 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2072,7 +2072,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (likely(writable)) { if (likely(referenced)) { result = SCAN_SUCCEED; - trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero, + trace_mm_collapse_huge_page_isolate(page, none_or_zero, referenced, writable, result); return 1; } @@ -2082,7 +2082,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, out: release_pte_pages(pte, _pte); - trace_mm_collapse_huge_page_isolate(page_to_pfn(page), none_or_zero, + trace_mm_collapse_huge_page_isolate(page, none_or_zero, referenced, writable, result); return 0; } @@ -2580,7 +2580,7 @@ out_unmap: collapse_huge_page(mm, address, hpage, vma, node); } out: - trace_mm_khugepaged_scan_pmd(mm, page_to_pfn(page), writable, referenced, + trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, none_or_zero, result); return ret; } -- cgit v1.2.3