From f0b791a34cb3cffd2bbc3ca4365c9b719fa2c9f3 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Jan 2014 15:52:49 -0800 Subject: mm: print more details for bad_page() bad_page() is cool in that it prints out a bunch of data about the page. But, I can never remember which page flags are good and which are bad, or whether ->index or ->mapping is required to be NULL. This patch allows bad/dump_page() callers to specify a string about why they are dumping the page and adds explanation strings to a number of places. It also adds a 'bad_flags' argument to bad_page(), which it then dumps out separately from the flags which are actually set. This way, the messages will show specifically why the page was bad, *specifically* which flags it is complaining about, if it was a page flag combination which was the problem. [akpm@linux-foundation.org: switch to pr_alert] Signed-off-by: Dave Hansen Reviewed-by: Christoph Lameter Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a512dd836931..03bbcb84d96e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2029,7 +2029,9 @@ extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages; extern int soft_offline_page(struct page *page, int flags); -extern void dump_page(struct page *page); +extern void dump_page(struct page *page, char *reason); +extern void dump_page_badflags(struct page *page, char *reason, + unsigned long badflags); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, -- cgit v1.2.3 From 309381feaee564281c3d9e90fbca8963bb7428ad Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 23 Jan 2014 15:52:54 -0800 Subject: mm: dump page when hitting a VM_BUG_ON using VM_BUG_ON_PAGE Most of the VM_BUG_ON assertions are performed on a page. Usually, when one of these assertions fails we'll get a BUG_ON with a call stack and the registers. I've recently noticed based on the requests to add a small piece of code that dumps the page to various VM_BUG_ON sites that the page dump is quite useful to people debugging issues in mm. This patch adds a VM_BUG_ON_PAGE(cond, page) which beyond doing what VM_BUG_ON() does, also dumps the page before executing the actual BUG_ON. [akpm@linux-foundation.org: fix up includes] Signed-off-by: Sasha Levin Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 8 ++++---- include/linux/gfp.h | 1 + include/linux/hugetlb.h | 3 ++- include/linux/hugetlb_cgroup.h | 5 +++-- include/linux/mm.h | 29 +++++++++++++---------------- include/linux/mmdebug.h | 9 +++++++++ include/linux/page-flags.h | 10 +++++----- include/linux/pagemap.h | 10 +++++----- include/linux/percpu.h | 1 + mm/cleancache.c | 6 +++--- mm/compaction.c | 2 +- mm/filemap.c | 16 ++++++++-------- mm/huge_memory.c | 36 ++++++++++++++++++------------------ mm/hugetlb.c | 10 +++++----- mm/hugetlb_cgroup.c | 2 +- mm/internal.h | 10 +++++----- mm/ksm.c | 12 ++++++------ mm/memcontrol.c | 28 ++++++++++++++-------------- mm/memory.c | 8 ++++---- mm/migrate.c | 6 +++--- mm/mlock.c | 4 ++-- mm/page_alloc.c | 21 +++++++++++---------- mm/page_io.c | 4 ++-- mm/rmap.c | 10 +++++----- mm/shmem.c | 8 ++++---- mm/slub.c | 12 ++++++------ mm/swap.c | 36 ++++++++++++++++++------------------ mm/swap_state.c | 16 ++++++++-------- mm/swapfile.c | 8 ++++---- mm/vmscan.c | 20 ++++++++++---------- 30 files changed, 181 insertions(+), 170 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0596e8e0cc19..207d9aef662d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, static inline void get_head_page_multiple(struct page *page, int nr) { - VM_BUG_ON(page != compound_head(page)); - VM_BUG_ON(page_count(page) == 0); + VM_BUG_ON_PAGE(page != compound_head(page), page); + VM_BUG_ON_PAGE(page_count(page) == 0, page); atomic_add(nr, &page->_count); SetPageReferenced(page); } @@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, head = pte_page(pte); page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); do { - VM_BUG_ON(compound_head(page) != head); + VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; if (PageTail(page)) get_huge_page_tail(page); @@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, head = pte_page(pte); page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); do { - VM_BUG_ON(compound_head(page) != head); + VM_BUG_ON_PAGE(compound_head(page) != head, page); pages[*nr] = page; if (PageTail(page)) get_huge_page_tail(page); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 9b4dd491f7e8..0437439bc047 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -1,6 +1,7 @@ #ifndef __LINUX_GFP_H #define __LINUX_GFP_H +#include #include #include #include diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d01cc972a1d9..8c43cc469d78 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -2,6 +2,7 @@ #define _LINUX_HUGETLB_H #include +#include #include #include #include @@ -354,7 +355,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, static inline struct hstate *page_hstate(struct page *page) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); return size_to_hstate(PAGE_SIZE << compound_order(page)); } diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index ce8217f7b5c2..787bba3bf552 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -15,6 +15,7 @@ #ifndef _LINUX_HUGETLB_CGROUP_H #define _LINUX_HUGETLB_CGROUP_H +#include #include struct hugetlb_cgroup; @@ -28,7 +29,7 @@ struct hugetlb_cgroup; static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; @@ -38,7 +39,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) static inline int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; diff --git a/include/linux/mm.h b/include/linux/mm.h index 03bbcb84d96e..d9992fc128ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #ifdef __KERNEL__ +#include #include #include #include @@ -303,7 +304,7 @@ static inline int get_freepage_migratetype(struct page *page) */ static inline int put_page_testzero(struct page *page) { - VM_BUG_ON(atomic_read(&page->_count) == 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) == 0, page); return atomic_dec_and_test(&page->_count); } @@ -364,7 +365,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) static inline void compound_lock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(PageSlab(page)); + VM_BUG_ON_PAGE(PageSlab(page), page); bit_spin_lock(PG_compound_lock, &page->flags); #endif } @@ -372,7 +373,7 @@ static inline void compound_lock(struct page *page) static inline void compound_unlock(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(PageSlab(page)); + VM_BUG_ON_PAGE(PageSlab(page), page); bit_spin_unlock(PG_compound_lock, &page->flags); #endif } @@ -447,7 +448,7 @@ static inline bool __compound_tail_refcounted(struct page *page) */ static inline bool compound_tail_refcounted(struct page *page) { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); return __compound_tail_refcounted(page); } @@ -456,9 +457,9 @@ static inline void get_huge_page_tail(struct page *page) /* * __split_huge_page_refcount() cannot run from under us. */ - VM_BUG_ON(!PageTail(page)); - VM_BUG_ON(page_mapcount(page) < 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); + VM_BUG_ON_PAGE(!PageTail(page), page); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); + VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); if (compound_tail_refcounted(page->first_page)) atomic_inc(&page->_mapcount); } @@ -474,7 +475,7 @@ static inline void get_page(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); atomic_inc(&page->_count); } @@ -511,13 +512,13 @@ static inline int PageBuddy(struct page *page) static inline void __SetPageBuddy(struct page *page) { - VM_BUG_ON(atomic_read(&page->_mapcount) != -1); + VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page); atomic_set(&page->_mapcount, PAGE_BUDDY_MAPCOUNT_VALUE); } static inline void __ClearPageBuddy(struct page *page) { - VM_BUG_ON(!PageBuddy(page)); + VM_BUG_ON_PAGE(!PageBuddy(page), page); atomic_set(&page->_mapcount, -1); } @@ -1401,7 +1402,7 @@ static inline bool ptlock_init(struct page *page) * slab code uses page->slab_cache and page->first_page (for tail * pages), which share storage with page->ptl. */ - VM_BUG_ON(*(unsigned long *)&page->ptl); + VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); if (!ptlock_alloc(page)) return false; spin_lock_init(ptlock_ptr(page)); @@ -1492,7 +1493,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) static inline void pgtable_pmd_page_dtor(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - VM_BUG_ON(page->pmd_huge_pte); + VM_BUG_ON_PAGE(page->pmd_huge_pte, page); #endif ptlock_free(page); } @@ -2029,10 +2030,6 @@ extern void shake_page(struct page *p, int access); extern atomic_long_t num_poisoned_pages; extern int soft_offline_page(struct page *page, int flags); -extern void dump_page(struct page *page, char *reason); -extern void dump_page_badflags(struct page *page, char *reason, - unsigned long badflags); - #if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS) extern void clear_huge_page(struct page *page, unsigned long addr, diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 580bd587d916..5042c036dda9 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -1,10 +1,19 @@ #ifndef LINUX_MM_DEBUG_H #define LINUX_MM_DEBUG_H 1 +struct page; + +extern void dump_page(struct page *page, char *reason); +extern void dump_page_badflags(struct page *page, char *reason, + unsigned long badflags); + #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) +#define VM_BUG_ON_PAGE(cond, page) \ + do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0) #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) #endif #ifdef CONFIG_DEBUG_VIRTUAL diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 98ada58f9942..e464b4e987e8 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -412,7 +412,7 @@ static inline void ClearPageCompound(struct page *page) */ static inline int PageTransHuge(struct page *page) { - VM_BUG_ON(PageTail(page)); + VM_BUG_ON_PAGE(PageTail(page), page); return PageHead(page); } @@ -460,25 +460,25 @@ static inline int PageTransTail(struct page *page) */ static inline int PageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); return PageActive(page); } static inline void SetPageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); SetPageActive(page); } static inline void __ClearPageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); __ClearPageActive(page); } static inline void ClearPageSlabPfmemalloc(struct page *page) { - VM_BUG_ON(!PageSlab(page)); + VM_BUG_ON_PAGE(!PageSlab(page), page); ClearPageActive(page); } diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e3dea75a078b..1710d1b060ba 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -162,7 +162,7 @@ static inline int page_cache_get_speculative(struct page *page) * disabling preempt, and hence no need for the "speculative get" that * SMP requires. */ - VM_BUG_ON(page_count(page) == 0); + VM_BUG_ON_PAGE(page_count(page) == 0, page); atomic_inc(&page->_count); #else @@ -175,7 +175,7 @@ static inline int page_cache_get_speculative(struct page *page) return 0; } #endif - VM_BUG_ON(PageTail(page)); + VM_BUG_ON_PAGE(PageTail(page), page); return 1; } @@ -191,14 +191,14 @@ static inline int page_cache_add_speculative(struct page *page, int count) # ifdef CONFIG_PREEMPT_COUNT VM_BUG_ON(!in_atomic()); # endif - VM_BUG_ON(page_count(page) == 0); + VM_BUG_ON_PAGE(page_count(page) == 0, page); atomic_add(count, &page->_count); #else if (unlikely(!atomic_add_unless(&page->_count, count, 0))) return 0; #endif - VM_BUG_ON(PageCompound(page) && page != compound_head(page)); + VM_BUG_ON_PAGE(PageCompound(page) && page != compound_head(page), page); return 1; } @@ -210,7 +210,7 @@ static inline int page_freeze_refs(struct page *page, int count) static inline void page_unfreeze_refs(struct page *page, int count) { - VM_BUG_ON(page_count(page) != 0); + VM_BUG_ON_PAGE(page_count(page) != 0, page); VM_BUG_ON(count == 0); atomic_set(&page->_count, count); diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 9e4761caa80c..e3817d2441b6 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -1,6 +1,7 @@ #ifndef __LINUX_PERCPU_H #define __LINUX_PERCPU_H +#include #include #include #include diff --git a/mm/cleancache.c b/mm/cleancache.c index 5875f48ce279..d0eac4350403 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -237,7 +237,7 @@ int __cleancache_get_page(struct page *page) goto out; } - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; if (fake_pool_id < 0) goto out; @@ -279,7 +279,7 @@ void __cleancache_put_page(struct page *page) return; } - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); fake_pool_id = page->mapping->host->i_sb->cleancache_poolid; if (fake_pool_id < 0) return; @@ -318,7 +318,7 @@ void __cleancache_invalidate_page(struct address_space *mapping, if (pool_id < 0) return; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (cleancache_get_key(mapping->host, &key) >= 0) { cleancache_ops->invalidate_page(pool_id, key, page->index); diff --git a/mm/compaction.c b/mm/compaction.c index 3a91a2ea3d34..e0ab02d70f13 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -601,7 +601,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, if (__isolate_lru_page(page, mode) != 0) continue; - VM_BUG_ON(PageTransCompound(page)); + VM_BUG_ON_PAGE(PageTransCompound(page), page); /* Successfully isolated */ cc->finished_update_migrate = true; diff --git a/mm/filemap.c b/mm/filemap.c index b7749a92021c..7a7f3e0db738 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -409,9 +409,9 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) { int error; - VM_BUG_ON(!PageLocked(old)); - VM_BUG_ON(!PageLocked(new)); - VM_BUG_ON(new->mapping); + VM_BUG_ON_PAGE(!PageLocked(old), old); + VM_BUG_ON_PAGE(!PageLocked(new), new); + VM_BUG_ON_PAGE(new->mapping, new); error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM); if (!error) { @@ -461,8 +461,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, { int error; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageSwapBacked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageSwapBacked(page), page); error = mem_cgroup_cache_charge(page, current->mm, gfp_mask & GFP_RECLAIM_MASK); @@ -607,7 +607,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue); */ void unlock_page(struct page *page) { - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); clear_bit_unlock(PG_locked, &page->flags); smp_mb__after_clear_bit(); wake_up_page(page, PG_locked); @@ -760,7 +760,7 @@ repeat: page_cache_release(page); goto repeat; } - VM_BUG_ON(page->index != offset); + VM_BUG_ON_PAGE(page->index != offset, page); } return page; } @@ -1656,7 +1656,7 @@ retry_find: put_page(page); goto retry_find; } - VM_BUG_ON(page->index != offset); + VM_BUG_ON_PAGE(page->index != offset, page); /* * We have a locked page in the page cache, now we need to check diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 95d1acb0f3d2..25fab7150fa0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -712,7 +712,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, pgtable_t pgtable; spinlock_t *ptl; - VM_BUG_ON(!PageCompound(page)); + VM_BUG_ON_PAGE(!PageCompound(page), page); pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; @@ -893,7 +893,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, goto out; } src_page = pmd_page(pmd); - VM_BUG_ON(!PageHead(src_page)); + VM_BUG_ON_PAGE(!PageHead(src_page), src_page); get_page(src_page); page_dup_rmap(src_page); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); @@ -1067,7 +1067,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ptl = pmd_lock(mm, pmd); if (unlikely(!pmd_same(*pmd, orig_pmd))) goto out_free_pages; - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); pmdp_clear_flush(vma, haddr, pmd); /* leave pmd empty until pte is filled */ @@ -1133,7 +1133,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out_unlock; page = pmd_page(orig_pmd); - VM_BUG_ON(!PageCompound(page) || !PageHead(page)); + VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page); if (page_mapcount(page) == 1) { pmd_t entry; entry = pmd_mkyoung(orig_pmd); @@ -1211,7 +1211,7 @@ alloc: add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); put_huge_zero_page(); } else { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page); put_page(page); } @@ -1249,7 +1249,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, goto out; page = pmd_page(*pmd); - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); if (flags & FOLL_TOUCH) { pmd_t _pmd; /* @@ -1274,7 +1274,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, } } page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; - VM_BUG_ON(!PageCompound(page)); + VM_BUG_ON_PAGE(!PageCompound(page), page); if (flags & FOLL_GET) get_page_foll(page); @@ -1432,9 +1432,9 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, } else { page = pmd_page(orig_pmd); page_remove_rmap(page); - VM_BUG_ON(page_mapcount(page) < 0); + VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); atomic_long_dec(&tlb->mm->nr_ptes); spin_unlock(ptl); tlb_remove_page(tlb, page); @@ -2176,9 +2176,9 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, if (unlikely(!page)) goto out; - VM_BUG_ON(PageCompound(page)); - BUG_ON(!PageAnon(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); + VM_BUG_ON_PAGE(!PageAnon(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); /* cannot use mapcount: can't collapse if there's a gup pin */ if (page_count(page) != 1) @@ -2201,8 +2201,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, } /* 0 stands for page_is_file_cache(page) == false */ inc_zone_page_state(page, NR_ISOLATED_ANON + 0); - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageLRU(page), page); /* If there is no mapped pte young don't collapse the page */ if (pte_young(pteval) || PageReferenced(page) || @@ -2232,7 +2232,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, } else { src_page = pte_page(pteval); copy_user_highpage(page, src_page, address, vma); - VM_BUG_ON(page_mapcount(src_page) != 1); + VM_BUG_ON_PAGE(page_mapcount(src_page) != 1, src_page); release_pte_page(src_page); /* * ptl mostly unnecessary, but preempt has to @@ -2311,7 +2311,7 @@ static struct page struct vm_area_struct *vma, unsigned long address, int node) { - VM_BUG_ON(*hpage); + VM_BUG_ON_PAGE(*hpage, *hpage); /* * Allocate the page while the vma is still valid and under * the mmap_sem read mode so there is no memory allocation @@ -2580,7 +2580,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, */ node = page_to_nid(page); khugepaged_node_load[node]++; - VM_BUG_ON(PageCompound(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) goto out_unmap; /* cannot use mapcount: can't collapse if there's a gup pin */ @@ -2876,7 +2876,7 @@ again: return; } page = pmd_page(*pmd); - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(!page_count(page), page); get_page(page); spin_unlock(ptl); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 04306b9de90d..c01cb9fedb18 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -584,7 +584,7 @@ static void update_and_free_page(struct hstate *h, struct page *page) 1 << PG_active | 1 << PG_reserved | 1 << PG_private | 1 << PG_writeback); } - VM_BUG_ON(hugetlb_cgroup_from_page(page)); + VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page); set_compound_page_dtor(page, NULL); set_page_refcounted(page); arch_release_hugepage(page); @@ -1089,7 +1089,7 @@ retry: * no users -- drop the buddy allocator's reference. */ put_page_testzero(page); - VM_BUG_ON(page_count(page)); + VM_BUG_ON_PAGE(page_count(page), page); enqueue_huge_page(h, page); } free: @@ -3503,7 +3503,7 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage) bool isolate_huge_page(struct page *page, struct list_head *list) { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); if (!get_page_unless_zero(page)) return false; spin_lock(&hugetlb_lock); @@ -3514,7 +3514,7 @@ bool isolate_huge_page(struct page *page, struct list_head *list) void putback_active_hugepage(struct page *page) { - VM_BUG_ON(!PageHead(page)); + VM_BUG_ON_PAGE(!PageHead(page), page); spin_lock(&hugetlb_lock); list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist); spin_unlock(&hugetlb_lock); @@ -3523,7 +3523,7 @@ void putback_active_hugepage(struct page *page) bool is_hugepage_active(struct page *page) { - VM_BUG_ON(!PageHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page), page); /* * This function can be called for a tail page because the caller, * scan_movable_pages, scans through a given pfn-range which typically diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index d747a84e09b0..cb00829bb466 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -390,7 +390,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage) if (hugetlb_cgroup_disabled()) return; - VM_BUG_ON(!PageHuge(oldhpage)); + VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage); spin_lock(&hugetlb_lock); h_cg = hugetlb_cgroup_from_page(oldhpage); set_hugetlb_cgroup(oldhpage, NULL); diff --git a/mm/internal.h b/mm/internal.h index a346ba120e42..dc95e979ae56 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -27,8 +27,8 @@ static inline void set_page_count(struct page *page, int v) */ static inline void set_page_refcounted(struct page *page) { - VM_BUG_ON(PageTail(page)); - VM_BUG_ON(atomic_read(&page->_count)); + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(atomic_read(&page->_count), page); set_page_count(page, 1); } @@ -46,7 +46,7 @@ static inline void __get_page_tail_foll(struct page *page, * speculative page access (like in * page_cache_get_speculative()) on tail pages. */ - VM_BUG_ON(atomic_read(&page->first_page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->first_page->_count) <= 0, page); if (get_page_head) atomic_inc(&page->first_page->_count); get_huge_page_tail(page); @@ -71,7 +71,7 @@ static inline void get_page_foll(struct page *page) * Getting a normal page or the head of a compound page * requires to already have an elevated page->_count. */ - VM_BUG_ON(atomic_read(&page->_count) <= 0); + VM_BUG_ON_PAGE(atomic_read(&page->_count) <= 0, page); atomic_inc(&page->_count); } } @@ -173,7 +173,7 @@ static inline void munlock_vma_pages_all(struct vm_area_struct *vma) static inline int mlocked_vma_newpage(struct vm_area_struct *vma, struct page *page) { - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) return 0; diff --git a/mm/ksm.c b/mm/ksm.c index 3df141e5f3e0..f91ddf5c3688 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1898,13 +1898,13 @@ int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) int ret = SWAP_AGAIN; int search_new_forks = 0; - VM_BUG_ON(!PageKsm(page)); + VM_BUG_ON_PAGE(!PageKsm(page), page); /* * Rely on the page lock to protect against concurrent modifications * to that page's node of the stable tree. */ - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); stable_node = page_stable_node(page); if (!stable_node) @@ -1958,13 +1958,13 @@ void ksm_migrate_page(struct page *newpage, struct page *oldpage) { struct stable_node *stable_node; - VM_BUG_ON(!PageLocked(oldpage)); - VM_BUG_ON(!PageLocked(newpage)); - VM_BUG_ON(newpage->mapping != oldpage->mapping); + VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); + VM_BUG_ON_PAGE(newpage->mapping != oldpage->mapping, newpage); stable_node = page_stable_node(newpage); if (stable_node) { - VM_BUG_ON(stable_node->kpfn != page_to_pfn(oldpage)); + VM_BUG_ON_PAGE(stable_node->kpfn != page_to_pfn(oldpage), oldpage); stable_node->kpfn = page_to_pfn(newpage); /* * newpage->mapping was set in advance; now we need smp_wmb() diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7890ce9d6bd1..72f2d90e7ef6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2897,7 +2897,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) unsigned short id; swp_entry_t ent; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); pc = lookup_page_cgroup(page); lock_page_cgroup(pc); @@ -2931,7 +2931,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, bool anon; lock_page_cgroup(pc); - VM_BUG_ON(PageCgroupUsed(pc)); + VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); /* * we don't need page_cgroup_lock about tail pages, becase they are not * accessed by any other context at this point. @@ -2966,7 +2966,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, if (lrucare) { if (was_on_lru) { lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, page_lru(page)); } @@ -3780,7 +3780,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order) if (!memcg) return; - VM_BUG_ON(mem_cgroup_is_root(memcg)); + VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page); memcg_uncharge_kmem(memcg, PAGE_SIZE << order); } #else @@ -3859,7 +3859,7 @@ static int mem_cgroup_move_account(struct page *page, bool anon = PageAnon(page); VM_BUG_ON(from == to); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); /* * The page is isolated from LRU. So, collapse function * will not handle this page. But page splitting can happen. @@ -3952,7 +3952,7 @@ static int mem_cgroup_move_parent(struct page *page, parent = root_mem_cgroup; if (nr_pages > 1) { - VM_BUG_ON(!PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); flags = compound_lock_irqsave(page); } @@ -3986,7 +3986,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, if (PageTransHuge(page)) { nr_pages <<= compound_order(page); - VM_BUG_ON(!PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); /* * Never OOM-kill a process for a huge page. The * fault handler will fall back to regular pages. @@ -4006,8 +4006,8 @@ int mem_cgroup_newpage_charge(struct page *page, { if (mem_cgroup_disabled()) return 0; - VM_BUG_ON(page_mapped(page)); - VM_BUG_ON(page->mapping && !PageAnon(page)); + VM_BUG_ON_PAGE(page_mapped(page), page); + VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); VM_BUG_ON(!mm); return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_ANON); @@ -4211,7 +4211,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, if (PageTransHuge(page)) { nr_pages <<= compound_order(page); - VM_BUG_ON(!PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageTransHuge(page), page); } /* * Check if our page_cgroup is valid @@ -4303,7 +4303,7 @@ void mem_cgroup_uncharge_page(struct page *page) /* early check. */ if (page_mapped(page)) return; - VM_BUG_ON(page->mapping && !PageAnon(page)); + VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); /* * If the page is in swap cache, uncharge should be deferred * to the swap path, which also properly accounts swap usage @@ -4323,8 +4323,8 @@ void mem_cgroup_uncharge_page(struct page *page) void mem_cgroup_uncharge_cache_page(struct page *page) { - VM_BUG_ON(page_mapped(page)); - VM_BUG_ON(page->mapping); + VM_BUG_ON_PAGE(page_mapped(page), page); + VM_BUG_ON_PAGE(page->mapping, page); __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false); } @@ -6880,7 +6880,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma, enum mc_target_type ret = MC_TARGET_NONE; page = pmd_page(pmd); - VM_BUG_ON(!page || !PageHead(page)); + VM_BUG_ON_PAGE(!page || !PageHead(page), page); if (!move_anon()) return ret; pc = lookup_page_cgroup(page); diff --git a/mm/memory.c b/mm/memory.c index 71d70c082b98..be6a0c0d4ae0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -289,7 +289,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) return 0; batch = tlb->active; } - VM_BUG_ON(batch->nr > batch->max); + VM_BUG_ON_PAGE(batch->nr > batch->max, page); return batch->max - batch->nr; } @@ -2702,7 +2702,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, goto unwritable_page; } } else - VM_BUG_ON(!PageLocked(old_page)); + VM_BUG_ON_PAGE(!PageLocked(old_page), old_page); /* * Since we dropped the lock we need to revalidate @@ -3358,7 +3358,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(!(ret & VM_FAULT_LOCKED))) lock_page(vmf.page); else - VM_BUG_ON(!PageLocked(vmf.page)); + VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page); /* * Should we do an early C-O-W break? @@ -3395,7 +3395,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto unwritable_page; } } else - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); page_mkwrite = 1; } } diff --git a/mm/migrate.c b/mm/migrate.c index a8025befc323..4b3996eb7f0f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -499,7 +499,7 @@ void migrate_page_copy(struct page *newpage, struct page *page) if (PageUptodate(page)) SetPageUptodate(newpage); if (TestClearPageActive(page)) { - VM_BUG_ON(PageUnevictable(page)); + VM_BUG_ON_PAGE(PageUnevictable(page), page); SetPageActive(newpage); } else if (TestClearPageUnevictable(page)) SetPageUnevictable(newpage); @@ -871,7 +871,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * free the metadata, so the page can be freed. */ if (!page->mapping) { - VM_BUG_ON(PageAnon(page)); + VM_BUG_ON_PAGE(PageAnon(page), page); if (page_has_private(page)) { try_to_free_buffers(page); goto uncharge; @@ -1618,7 +1618,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) { int page_lru; - VM_BUG_ON(compound_order(page) && !PageTransHuge(page)); + VM_BUG_ON_PAGE(compound_order(page) && !PageTransHuge(page), page); /* Avoid migrating to a node that is nearly full */ if (!migrate_balanced_pgdat(pgdat, 1UL << compound_order(page))) diff --git a/mm/mlock.c b/mm/mlock.c index b30adbe62034..4e1a68162285 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -279,8 +279,8 @@ static int __mlock_posix_error_return(long retval) static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec, int *pgrescued) { - VM_BUG_ON(PageLRU(page)); - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (page_mapcount(page) <= 1 && page_evictable(page)) { pagevec_add(pvec, page); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1939f4446a36..f18f016cca80 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -509,12 +509,12 @@ static inline int page_is_buddy(struct page *page, struct page *buddy, return 0; if (page_is_guard(buddy) && page_order(buddy) == order) { - VM_BUG_ON(page_count(buddy) != 0); + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return 1; } if (PageBuddy(buddy) && page_order(buddy) == order) { - VM_BUG_ON(page_count(buddy) != 0); + VM_BUG_ON_PAGE(page_count(buddy) != 0, buddy); return 1; } return 0; @@ -564,8 +564,8 @@ static inline void __free_one_page(struct page *page, page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1); - VM_BUG_ON(page_idx & ((1 << order) - 1)); - VM_BUG_ON(bad_range(zone, page)); + VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page); + VM_BUG_ON_PAGE(bad_range(zone, page), page); while (order < MAX_ORDER-1) { buddy_idx = __find_buddy_index(page_idx, order); @@ -827,7 +827,7 @@ static inline void expand(struct zone *zone, struct page *page, area--; high--; size >>= 1; - VM_BUG_ON(bad_range(zone, &page[size])); + VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]); #ifdef CONFIG_DEBUG_PAGEALLOC if (high < debug_guardpage_minorder()) { @@ -980,7 +980,7 @@ int move_freepages(struct zone *zone, for (page = start_page; page <= end_page;) { /* Make sure we are not inadvertently changing nodes */ - VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone)); + VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page); if (!pfn_valid_within(page_to_pfn(page))) { page++; @@ -1429,8 +1429,8 @@ void split_page(struct page *page, unsigned int order) { int i; - VM_BUG_ON(PageCompound(page)); - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(PageCompound(page), page); + VM_BUG_ON_PAGE(!page_count(page), page); #ifdef CONFIG_KMEMCHECK /* @@ -1577,7 +1577,7 @@ again: zone_statistics(preferred_zone, zone, gfp_flags); local_irq_restore(flags); - VM_BUG_ON(bad_range(zone, page)); + VM_BUG_ON_PAGE(bad_range(zone, page), page); if (prep_new_page(page, order, gfp_flags)) goto again; return page; @@ -6021,7 +6021,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags, pfn = page_to_pfn(page); bitmap = get_pageblock_bitmap(zone, pfn); bitidx = pfn_to_bitidx(zone, pfn); - VM_BUG_ON(!zone_spans_pfn(zone, pfn)); + VM_BUG_ON_PAGE(!zone_spans_pfn(zone, pfn), page); for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1) if (flags & value) @@ -6539,3 +6539,4 @@ void dump_page(struct page *page, char *reason) { dump_page_badflags(page, reason, 0); } +EXPORT_SYMBOL_GPL(dump_page); diff --git a/mm/page_io.c b/mm/page_io.c index 8c79a4764be0..7247be6114ac 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -320,8 +320,8 @@ int swap_readpage(struct page *page) int ret = 0; struct swap_info_struct *sis = page_swap_info(page); - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageUptodate(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageUptodate(page), page); if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); diff --git a/mm/rmap.c b/mm/rmap.c index 962e2a1e13a0..2dcd3353c3f6 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -894,9 +894,9 @@ void page_move_anon_rmap(struct page *page, { struct anon_vma *anon_vma = vma->anon_vma; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON(!anon_vma); - VM_BUG_ON(page->index != linear_page_index(vma, address)); + VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page); anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; @@ -995,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page, if (unlikely(PageKsm(page))) return; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); /* address might be in next vma when migration races vma_adjust */ if (first) __page_set_anon_rmap(page, vma, address, exclusive); @@ -1481,7 +1481,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags) .anon_lock = page_lock_anon_vma_read, }; - VM_BUG_ON(!PageHuge(page) && PageTransHuge(page)); + VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page); /* * During exec, a temporary VMA is setup and later moved. @@ -1533,7 +1533,7 @@ int try_to_munlock(struct page *page) }; - VM_BUG_ON(!PageLocked(page) || PageLRU(page)); + VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); ret = rmap_walk(page, &rwc); return ret; diff --git a/mm/shmem.c b/mm/shmem.c index 902a14842b74..8156f95ec0cf 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -285,8 +285,8 @@ static int shmem_add_to_page_cache(struct page *page, { int error; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_cache_get(page); page->mapping = mapping; @@ -491,7 +491,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, continue; if (!unfalloc || !PageUptodate(page)) { if (page->mapping == mapping) { - VM_BUG_ON(PageWriteback(page)); + VM_BUG_ON_PAGE(PageWriteback(page), page); truncate_inode_page(mapping, page); } } @@ -568,7 +568,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, lock_page(page); if (!unfalloc || !PageUptodate(page)) { if (page->mapping == mapping) { - VM_BUG_ON(PageWriteback(page)); + VM_BUG_ON_PAGE(PageWriteback(page), page); truncate_inode_page(mapping, page); } } diff --git a/mm/slub.c b/mm/slub.c index 545a170ebf9f..34bb8c65a2d8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1559,7 +1559,7 @@ static inline void *acquire_slab(struct kmem_cache *s, new.freelist = freelist; } - VM_BUG_ON(new.frozen); + VM_BUG_ON_PAGE(new.frozen, &new); new.frozen = 1; if (!__cmpxchg_double_slab(s, page, @@ -1812,7 +1812,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page, set_freepointer(s, freelist, prior); new.counters = counters; new.inuse--; - VM_BUG_ON(!new.frozen); + VM_BUG_ON_PAGE(!new.frozen, &new); } while (!__cmpxchg_double_slab(s, page, prior, counters, @@ -1840,7 +1840,7 @@ redo: old.freelist = page->freelist; old.counters = page->counters; - VM_BUG_ON(!old.frozen); + VM_BUG_ON_PAGE(!old.frozen, &old); /* Determine target state of the slab */ new.counters = old.counters; @@ -1952,7 +1952,7 @@ static void unfreeze_partials(struct kmem_cache *s, old.freelist = page->freelist; old.counters = page->counters; - VM_BUG_ON(!old.frozen); + VM_BUG_ON_PAGE(!old.frozen, &old); new.counters = old.counters; new.freelist = old.freelist; @@ -2225,7 +2225,7 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page) counters = page->counters; new.counters = counters; - VM_BUG_ON(!new.frozen); + VM_BUG_ON_PAGE(!new.frozen, &new); new.inuse = page->objects; new.frozen = freelist != NULL; @@ -2319,7 +2319,7 @@ load_freelist: * page is pointing to the page from which the objects are obtained. * That page must be frozen for per cpu allocations to work. */ - VM_BUG_ON(!c->page->frozen); + VM_BUG_ON_PAGE(!c->page->frozen, c->page); c->freelist = get_freepointer(s, freelist); c->tid = next_tid(c->tid); local_irq_restore(flags); diff --git a/mm/swap.c b/mm/swap.c index d1100b619e61..b31ba67d440a 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -57,7 +57,7 @@ static void __page_cache_release(struct page *page) spin_lock_irqsave(&zone->lru_lock, flags); lruvec = mem_cgroup_page_lruvec(page, zone); - VM_BUG_ON(!PageLRU(page)); + VM_BUG_ON_PAGE(!PageLRU(page), page); __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); spin_unlock_irqrestore(&zone->lru_lock, flags); @@ -130,8 +130,8 @@ static void put_compound_page(struct page *page) * __split_huge_page_refcount cannot race * here. */ - VM_BUG_ON(!PageHead(page_head)); - VM_BUG_ON(page_mapcount(page) != 0); + VM_BUG_ON_PAGE(!PageHead(page_head), page_head); + VM_BUG_ON_PAGE(page_mapcount(page) != 0, page); if (put_page_testzero(page_head)) { /* * If this is the tail of a slab @@ -148,7 +148,7 @@ static void put_compound_page(struct page *page) * the compound page enters the buddy * allocator. */ - VM_BUG_ON(PageSlab(page_head)); + VM_BUG_ON_PAGE(PageSlab(page_head), page_head); __put_compound_page(page_head); } return; @@ -199,7 +199,7 @@ out_put_single: __put_single_page(page); return; } - VM_BUG_ON(page_head != page->first_page); + VM_BUG_ON_PAGE(page_head != page->first_page, page); /* * We can release the refcount taken by * get_page_unless_zero() now that @@ -207,12 +207,12 @@ out_put_single: * compound_lock. */ if (put_page_testzero(page_head)) - VM_BUG_ON(1); + VM_BUG_ON_PAGE(1, page_head); /* __split_huge_page_refcount will wait now */ - VM_BUG_ON(page_mapcount(page) <= 0); + VM_BUG_ON_PAGE(page_mapcount(page) <= 0, page); atomic_dec(&page->_mapcount); - VM_BUG_ON(atomic_read(&page_head->_count) <= 0); - VM_BUG_ON(atomic_read(&page->_count) != 0); + VM_BUG_ON_PAGE(atomic_read(&page_head->_count) <= 0, page_head); + VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); compound_unlock_irqrestore(page_head, flags); if (put_page_testzero(page_head)) { @@ -223,7 +223,7 @@ out_put_single: } } else { /* page_head is a dangling pointer */ - VM_BUG_ON(PageTail(page)); + VM_BUG_ON_PAGE(PageTail(page), page); goto out_put_single; } } @@ -264,7 +264,7 @@ bool __get_page_tail(struct page *page) * page. __split_huge_page_refcount * cannot race here. */ - VM_BUG_ON(!PageHead(page_head)); + VM_BUG_ON_PAGE(!PageHead(page_head), page_head); __get_page_tail_foll(page, true); return true; } else { @@ -604,8 +604,8 @@ EXPORT_SYMBOL(__lru_cache_add); */ void lru_cache_add(struct page *page) { - VM_BUG_ON(PageActive(page) && PageUnevictable(page)); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageLRU(page), page); __lru_cache_add(page); } @@ -846,7 +846,7 @@ void release_pages(struct page **pages, int nr, int cold) } lruvec = mem_cgroup_page_lruvec(page, zone); - VM_BUG_ON(!PageLRU(page)); + VM_BUG_ON_PAGE(!PageLRU(page), page); __ClearPageLRU(page); del_page_from_lru_list(page, lruvec, page_off_lru(page)); } @@ -888,9 +888,9 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, { const int file = 0; - VM_BUG_ON(!PageHead(page)); - VM_BUG_ON(PageCompound(page_tail)); - VM_BUG_ON(PageLRU(page_tail)); + VM_BUG_ON_PAGE(!PageHead(page), page); + VM_BUG_ON_PAGE(PageCompound(page_tail), page); + VM_BUG_ON_PAGE(PageLRU(page_tail), page); VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&lruvec_zone(lruvec)->lru_lock)); @@ -929,7 +929,7 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, int active = PageActive(page); enum lru_list lru = page_lru(page); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, lru); diff --git a/mm/swap_state.c b/mm/swap_state.c index e6f15f8ca2af..98e85e9c2b2d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -83,9 +83,9 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry) int error; struct address_space *address_space; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(PageSwapCache(page)); - VM_BUG_ON(!PageSwapBacked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(PageSwapCache(page), page); + VM_BUG_ON_PAGE(!PageSwapBacked(page), page); page_cache_get(page); SetPageSwapCache(page); @@ -139,9 +139,9 @@ void __delete_from_swap_cache(struct page *page) swp_entry_t entry; struct address_space *address_space; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!PageSwapCache(page)); - VM_BUG_ON(PageWriteback(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageSwapCache(page), page); + VM_BUG_ON_PAGE(PageWriteback(page), page); entry.val = page_private(page); address_space = swap_address_space(entry); @@ -165,8 +165,8 @@ int add_to_swap(struct page *page, struct list_head *list) swp_entry_t entry; int err; - VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(!PageUptodate(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageUptodate(page), page); entry = get_swap_page(); if (!entry.val) diff --git a/mm/swapfile.c b/mm/swapfile.c index 612a7c9795f6..d443dea95c27 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -906,7 +906,7 @@ int reuse_swap_page(struct page *page) { int count; - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (unlikely(PageKsm(page))) return 0; count = page_mapcount(page); @@ -926,7 +926,7 @@ int reuse_swap_page(struct page *page) */ int try_to_free_swap(struct page *page) { - VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON_PAGE(!PageLocked(page), page); if (!PageSwapCache(page)) return 0; @@ -2714,7 +2714,7 @@ struct swap_info_struct *page_swap_info(struct page *page) */ struct address_space *__page_file_mapping(struct page *page) { - VM_BUG_ON(!PageSwapCache(page)); + VM_BUG_ON_PAGE(!PageSwapCache(page), page); return page_swap_info(page)->swap_file->f_mapping; } EXPORT_SYMBOL_GPL(__page_file_mapping); @@ -2722,7 +2722,7 @@ EXPORT_SYMBOL_GPL(__page_file_mapping); pgoff_t __page_file_index(struct page *page) { swp_entry_t swap = { .val = page_private(page) }; - VM_BUG_ON(!PageSwapCache(page)); + VM_BUG_ON_PAGE(!PageSwapCache(page), page); return swp_offset(swap); } EXPORT_SYMBOL_GPL(__page_file_index); diff --git a/mm/vmscan.c b/mm/vmscan.c index eea668d9cff6..2254f36b74b8 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -603,7 +603,7 @@ void putback_lru_page(struct page *page) bool is_unevictable; int was_unevictable = PageUnevictable(page); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); redo: ClearPageUnevictable(page); @@ -794,8 +794,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, if (!trylock_page(page)) goto keep; - VM_BUG_ON(PageActive(page)); - VM_BUG_ON(page_zone(page) != zone); + VM_BUG_ON_PAGE(PageActive(page), page); + VM_BUG_ON_PAGE(page_zone(page) != zone, page); sc->nr_scanned++; @@ -1079,14 +1079,14 @@ activate_locked: /* Not a candidate for swapping, so reclaim swap space. */ if (PageSwapCache(page) && vm_swap_full()) try_to_free_swap(page); - VM_BUG_ON(PageActive(page)); + VM_BUG_ON_PAGE(PageActive(page), page); SetPageActive(page); pgactivate++; keep_locked: unlock_page(page); keep: list_add(&page->lru, &ret_pages); - VM_BUG_ON(PageLRU(page) || PageUnevictable(page)); + VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); } free_hot_cold_page_list(&free_pages, 1); @@ -1240,7 +1240,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, page = lru_to_page(src); prefetchw_prev_lru_page(page, src, flags); - VM_BUG_ON(!PageLRU(page)); + VM_BUG_ON_PAGE(!PageLRU(page), page); switch (__isolate_lru_page(page, mode)) { case 0: @@ -1295,7 +1295,7 @@ int isolate_lru_page(struct page *page) { int ret = -EBUSY; - VM_BUG_ON(!page_count(page)); + VM_BUG_ON_PAGE(!page_count(page), page); if (PageLRU(page)) { struct zone *zone = page_zone(page); @@ -1366,7 +1366,7 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) struct page *page = lru_to_page(page_list); int lru; - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); list_del(&page->lru); if (unlikely(!page_evictable(page))) { spin_unlock_irq(&zone->lru_lock); @@ -1586,7 +1586,7 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, page = lru_to_page(list); lruvec = mem_cgroup_page_lruvec(page, zone); - VM_BUG_ON(PageLRU(page)); + VM_BUG_ON_PAGE(PageLRU(page), page); SetPageLRU(page); nr_pages = hpage_nr_pages(page); @@ -3701,7 +3701,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) if (page_evictable(page)) { enum lru_list lru = page_lru_base_type(page); - VM_BUG_ON(PageActive(page)); + VM_BUG_ON_PAGE(PageActive(page), page); ClearPageUnevictable(page); del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); add_page_to_lru_list(page, lruvec, lru); -- cgit v1.2.3 From 363a044f739b0f07a8c063b838c5528d10720e02 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 23 Jan 2014 15:52:56 -0800 Subject: memcg, slab: kmem_cache_create_memcg(): fix memleak on fail path We do not free the cache's memcg_params if __kmem_cache_create fails. Fix this. Plus, rename memcg_register_cache() to memcg_alloc_cache_params(), because it actually does not register the cache anywhere, but simply initialize kmem_cache::memcg_params. [akpm@linux-foundation.org: fix build] Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Glauber Costa Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Pekka Enberg Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 14 +++++++++----- mm/memcontrol.c | 11 ++++++++--- mm/slab_common.c | 3 ++- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b3e7a667e03c..284daff507fb 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -497,8 +497,9 @@ void __memcg_kmem_commit_charge(struct page *page, void __memcg_kmem_uncharge_pages(struct page *page, int order); int memcg_cache_id(struct mem_cgroup *memcg); -int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache); +int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, + struct kmem_cache *root_cache); +void memcg_free_cache_params(struct kmem_cache *s); void memcg_release_cache(struct kmem_cache *cachep); void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); @@ -640,13 +641,16 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } -static inline int -memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache) +static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, + struct kmem_cache *s, struct kmem_cache *root_cache) { return 0; } +static inline void memcg_free_cache_params(struct kmem_cache *s) +{ +} + static inline void memcg_release_cache(struct kmem_cache *cachep) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 72f2d90e7ef6..b8ebe71f872d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3231,8 +3231,8 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) return 0; } -int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, - struct kmem_cache *root_cache) +int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, + struct kmem_cache *root_cache) { size_t size; @@ -3260,6 +3260,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, return 0; } +void memcg_free_cache_params(struct kmem_cache *s) +{ + kfree(s->memcg_params); +} + void memcg_release_cache(struct kmem_cache *s) { struct kmem_cache *root; @@ -3288,7 +3293,7 @@ void memcg_release_cache(struct kmem_cache *s) css_put(&memcg->css); out: - kfree(s->memcg_params); + memcg_free_cache_params(s); } /* diff --git a/mm/slab_common.c b/mm/slab_common.c index f70df3ef6f1a..70f9e249ac30 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -205,7 +205,7 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, if (!s->name) goto out_free_cache; - err = memcg_register_cache(memcg, s, parent_cache); + err = memcg_alloc_cache_params(memcg, s, parent_cache); if (err) goto out_free_cache; @@ -235,6 +235,7 @@ out_unlock: return s; out_free_cache: + memcg_free_cache_params(s); kfree(s->name); kmem_cache_free(kmem_cache, s); goto out_unlock; -- cgit v1.2.3 From 1aa13254259bdef0bca723849ab3bab308d2f0c3 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 23 Jan 2014 15:52:58 -0800 Subject: memcg, slab: clean up memcg cache initialization/destruction Currently, we have rather a messy function set relating to per-memcg kmem cache initialization/destruction. Per-memcg caches are created in memcg_create_kmem_cache(). This function calls kmem_cache_create_memcg() to allocate and initialize a kmem cache and then "registers" the new cache in the memcg_params::memcg_caches array of the parent cache. During its work-flow, kmem_cache_create_memcg() executes the following memcg-related functions: - memcg_alloc_cache_params(), to initialize memcg_params of the newly created cache; - memcg_cache_list_add(), to add the new cache to the memcg_slab_caches list. On the other hand, kmem_cache_destroy() called on a cache destruction only calls memcg_release_cache(), which does all the work: it cleans the reference to the cache in its parent's memcg_params::memcg_caches, removes the cache from the memcg_slab_caches list, and frees memcg_params. Such an inconsistency between destruction and initialization paths make the code difficult to read, so let's clean this up a bit. This patch moves all the code relating to registration of per-memcg caches (adding to memcg list, setting the pointer to a cache from its parent) to the newly created memcg_register_cache() and memcg_unregister_cache() functions making the initialization and destruction paths look symmetrical. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Glauber Costa Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Pekka Enberg Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++---- mm/memcontrol.c | 64 ++++++++++++++++++++++------------------------ mm/slab_common.c | 5 ++-- 3 files changed, 37 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 284daff507fb..abd0113b6620 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -500,8 +500,8 @@ int memcg_cache_id(struct mem_cgroup *memcg); int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache); void memcg_free_cache_params(struct kmem_cache *s); -void memcg_release_cache(struct kmem_cache *cachep); -void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep); +void memcg_register_cache(struct kmem_cache *s); +void memcg_unregister_cache(struct kmem_cache *s); int memcg_update_cache_size(struct kmem_cache *s, int num_groups); void memcg_update_array_size(int num_groups); @@ -651,12 +651,11 @@ static inline void memcg_free_cache_params(struct kmem_cache *s) { } -static inline void memcg_release_cache(struct kmem_cache *cachep) +static inline void memcg_register_cache(struct kmem_cache *s) { } -static inline void memcg_cache_list_add(struct mem_cgroup *memcg, - struct kmem_cache *s) +static inline void memcg_unregister_cache(struct kmem_cache *s) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b8ebe71f872d..739383cd3f70 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3095,16 +3095,6 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) css_put(&memcg->css); } -void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) -{ - if (!memcg) - return; - - mutex_lock(&memcg->slab_caches_mutex); - list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); - mutex_unlock(&memcg->slab_caches_mutex); -} - /* * helper for acessing a memcg's index. It will be used as an index in the * child cache array in kmem_cache, and also to derive its name. This function @@ -3265,21 +3255,41 @@ void memcg_free_cache_params(struct kmem_cache *s) kfree(s->memcg_params); } -void memcg_release_cache(struct kmem_cache *s) +void memcg_register_cache(struct kmem_cache *s) { struct kmem_cache *root; struct mem_cgroup *memcg; int id; + if (is_root_cache(s)) + return; + + root = s->memcg_params->root_cache; + memcg = s->memcg_params->memcg; + id = memcg_cache_id(memcg); + + css_get(&memcg->css); + + mutex_lock(&memcg->slab_caches_mutex); + list_add(&s->memcg_params->list, &memcg->memcg_slab_caches); + mutex_unlock(&memcg->slab_caches_mutex); + + root->memcg_params->memcg_caches[id] = s; /* - * This happens, for instance, when a root cache goes away before we - * add any memcg. + * the readers won't lock, make sure everybody sees the updated value, + * so they won't put stuff in the queue again for no reason */ - if (!s->memcg_params) - return; + wmb(); +} - if (s->memcg_params->is_root_cache) - goto out; +void memcg_unregister_cache(struct kmem_cache *s) +{ + struct kmem_cache *root; + struct mem_cgroup *memcg; + int id; + + if (is_root_cache(s)) + return; memcg = s->memcg_params->memcg; id = memcg_cache_id(memcg); @@ -3292,8 +3302,6 @@ void memcg_release_cache(struct kmem_cache *s) mutex_unlock(&memcg->slab_caches_mutex); css_put(&memcg->css); -out: - memcg_free_cache_params(s); } /* @@ -3451,26 +3459,13 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, mutex_lock(&memcg_cache_mutex); new_cachep = cache_from_memcg_idx(cachep, idx); - if (new_cachep) { - css_put(&memcg->css); + if (new_cachep) goto out; - } new_cachep = kmem_cache_dup(memcg, cachep); - if (new_cachep == NULL) { + if (new_cachep == NULL) new_cachep = cachep; - css_put(&memcg->css); - goto out; - } - - atomic_set(&new_cachep->memcg_params->nr_pages , 0); - cachep->memcg_params->memcg_caches[idx] = new_cachep; - /* - * the readers won't lock, make sure everybody sees the updated value, - * so they won't put stuff in the queue again for no reason - */ - wmb(); out: mutex_unlock(&memcg_cache_mutex); return new_cachep; @@ -3550,6 +3545,7 @@ static void memcg_create_cache_work_func(struct work_struct *w) cw = container_of(w, struct create_work, work); memcg_create_kmem_cache(cw->memcg, cw->cachep); + css_put(&cw->memcg->css); kfree(cw); } diff --git a/mm/slab_common.c b/mm/slab_common.c index 70f9e249ac30..db24ec48b946 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -215,7 +215,7 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, s->refcount = 1; list_add(&s->list, &slab_caches); - memcg_cache_list_add(memcg, s); + memcg_register_cache(s); out_unlock: mutex_unlock(&slab_mutex); @@ -265,7 +265,8 @@ void kmem_cache_destroy(struct kmem_cache *s) if (s->flags & SLAB_DESTROY_BY_RCU) rcu_barrier(); - memcg_release_cache(s); + memcg_unregister_cache(s); + memcg_free_cache_params(s); kfree(s->name); kmem_cache_free(kmem_cache, s); } else { -- cgit v1.2.3 From f8570263ee16eb1d5038b8e20d7db3a68bbb2b49 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 23 Jan 2014 15:53:06 -0800 Subject: memcg, slab: RCU protect memcg_params for root caches We relocate root cache's memcg_params whenever we need to grow the memcg_caches array to accommodate all kmem-active memory cgroups. Currently on relocation we free the old version immediately, which can lead to use-after-free, because the memcg_caches array is accessed lock-free (see cache_from_memcg_idx()). This patch fixes this by making memcg_params RCU-protected for root caches. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Glauber Costa Cc: Johannes Weiner Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Pekka Enberg Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 9 +++++++-- mm/memcontrol.c | 15 ++++++++------- mm/slab.h | 16 +++++++++++++++- 3 files changed, 30 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 1e2f4fe12773..a060142aa5f5 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -513,7 +513,9 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * * Both the root cache and the child caches will have it. For the root cache, * this will hold a dynamically allocated array large enough to hold - * information about the currently limited memcgs in the system. + * information about the currently limited memcgs in the system. To allow the + * array to be accessed without taking any locks, on relocation we free the old + * version only after a grace period. * * Child caches will hold extra metadata needed for its operation. Fields are: * @@ -528,7 +530,10 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) struct memcg_cache_params { bool is_root_cache; union { - struct kmem_cache *memcg_caches[0]; + struct { + struct rcu_head rcu_head; + struct kmem_cache *memcg_caches[0]; + }; struct { struct mem_cgroup *memcg; struct list_head list; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 80197e544764..216659d4441a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3178,18 +3178,17 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) if (num_groups > memcg_limited_groups_array_size) { int i; + struct memcg_cache_params *new_params; ssize_t size = memcg_caches_array_size(num_groups); size *= sizeof(void *); size += offsetof(struct memcg_cache_params, memcg_caches); - s->memcg_params = kzalloc(size, GFP_KERNEL); - if (!s->memcg_params) { - s->memcg_params = cur_params; + new_params = kzalloc(size, GFP_KERNEL); + if (!new_params) return -ENOMEM; - } - s->memcg_params->is_root_cache = true; + new_params->is_root_cache = true; /* * There is the chance it will be bigger than @@ -3203,7 +3202,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) for (i = 0; i < memcg_limited_groups_array_size; i++) { if (!cur_params->memcg_caches[i]) continue; - s->memcg_params->memcg_caches[i] = + new_params->memcg_caches[i] = cur_params->memcg_caches[i]; } @@ -3216,7 +3215,9 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups) * bigger than the others. And all updates will reset this * anyway. */ - kfree(cur_params); + rcu_assign_pointer(s->memcg_params, new_params); + if (cur_params) + kfree_rcu(cur_params, rcu_head); } return 0; } diff --git a/mm/slab.h b/mm/slab.h index 72d1f9df71bd..8184a7cde272 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -160,14 +160,28 @@ static inline const char *cache_name(struct kmem_cache *s) return s->name; } +/* + * Note, we protect with RCU only the memcg_caches array, not per-memcg caches. + * That said the caller must assure the memcg's cache won't go away. Since once + * created a memcg's cache is destroyed only along with the root cache, it is + * true if we are going to allocate from the cache or hold a reference to the + * root cache by other means. Otherwise, we should hold either the slab_mutex + * or the memcg's slab_caches_mutex while calling this function and accessing + * the returned value. + */ static inline struct kmem_cache * cache_from_memcg_idx(struct kmem_cache *s, int idx) { struct kmem_cache *cachep; + struct memcg_cache_params *params; if (!s->memcg_params) return NULL; - cachep = s->memcg_params->memcg_caches[idx]; + + rcu_read_lock(); + params = rcu_dereference(s->memcg_params); + cachep = params->memcg_caches[idx]; + rcu_read_unlock(); /* * Make sure we will access the up-to-date value. The code updating -- cgit v1.2.3 From 5e270e254885893f8c82ab9b91caa648af3690df Mon Sep 17 00:00:00 2001 From: Philipp Hachtmann Date: Thu, 23 Jan 2014 15:53:11 -0800 Subject: mm: free memblock.memory in free_all_bootmem When calling free_all_bootmem() the free areas under memblock's control are released to the buddy allocator. Additionally the reserved list is freed if it was reallocated by memblock. The same should apply for the memory list. Signed-off-by: Philipp Hachtmann Reviewed-by: Tejun Heo Cc: Joonsoo Kim Cc: Johannes Weiner Cc: Tang Chen Cc: Toshi Kani Cc: Jianguo Wu Cc: Yinghai Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 1 + mm/memblock.c | 16 ++++++++++++++++ mm/nobootmem.c | 10 +++++++++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cd0274bebd4c..1ef66360f0b0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -61,6 +61,7 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t size, phys_addr_t align, phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); +phys_addr_t get_allocated_memblock_memory_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index 1c2ef2c7edab..64ed2439cf75 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -291,6 +291,22 @@ phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( memblock.reserved.max); } +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + +phys_addr_t __init_memblock get_allocated_memblock_memory_regions_info( + phys_addr_t *addr) +{ + if (memblock.memory.regions == memblock_memory_init_regions) + return 0; + + *addr = __pa(memblock.memory.regions); + + return PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.memory.max); +} + +#endif + /** * memblock_double_array - double the size of the memblock regions array * @type: memblock type of the regions array being doubled diff --git a/mm/nobootmem.c b/mm/nobootmem.c index bb1a70cc97a7..17c89023184f 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -122,11 +122,19 @@ static unsigned long __init free_low_memory_core_early(void) for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) count += __free_memory_core(start, end); - /* free range that is used for reserved array if we allocate it */ + /* Free memblock.reserved array if it was allocated */ size = get_allocated_memblock_reserved_regions_info(&start); if (size) count += __free_memory_core(start, start + size); +#ifdef CONFIG_ARCH_DISCARD_MEMBLOCK + + /* Free memblock.memory array if it was allocated */ + size = get_allocated_memblock_memory_regions_info(&start); + if (size) + count += __free_memory_core(start, start + size); +#endif + return count; } -- cgit v1.2.3 From 54a43d54988a3731d644fdeb7a1d6f46b4ac64c7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 23 Jan 2014 15:53:13 -0800 Subject: numa: add a sysctl for numa_balancing Add a working sysctl to enable/disable automatic numa memory balancing at runtime. This allows us to track down performance problems with this feature and is generally a good idea. This was possible earlier through debugfs, but only with special debugging options set. Also fix the boot message. [akpm@linux-foundation.org: s/sched_numa_balancing/sysctl_numa_balancing/] Signed-off-by: Andi Kleen Acked-by: Mel Gorman Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/sysctl.h | 4 ++++ kernel/sched/core.c | 24 +++++++++++++++++++++++- kernel/sysctl.c | 9 +++++++++ mm/mempolicy.c | 2 +- 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 31e0193cb0c5..b13cf430764f 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -99,4 +99,8 @@ extern int sched_rt_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int sysctl_numa_balancing(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); + #endif /* _SCHED_SYSCTL_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4d6964e49711..7fea865a810d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1770,7 +1770,29 @@ void set_numabalancing_state(bool enabled) numabalancing_enabled = enabled; } #endif /* CONFIG_SCHED_DEBUG */ -#endif /* CONFIG_NUMA_BALANCING */ + +#ifdef CONFIG_PROC_SYSCTL +int sysctl_numa_balancing(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = numabalancing_enabled; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_numabalancing_state(state); + return err; +} +#endif +#endif /* * fork()/clone()-time setup: diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 332cefcdb04b..693eac39c202 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -389,6 +389,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "numa_balancing", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_numa_balancing, + .extra1 = &zero, + .extra2 = &one, + }, #endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_SCHED_DEBUG */ { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0cd2c4d4e270..947293e76533 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2668,7 +2668,7 @@ static void __init check_numabalancing_enable(void) if (nr_node_ids > 1 && !numabalancing_override) { printk(KERN_INFO "Enabling automatic NUMA balancing. " - "Configure with numa_balancing= or sysctl"); + "Configure with numa_balancing= or the kernel.numa_balancing sysctl"); set_numabalancing_state(numabalancing_default); } } -- cgit v1.2.3 From b30afea019a4548ee77b73e83f03104e0e3a0556 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Thu, 23 Jan 2014 15:53:18 -0800 Subject: include/linux/genalloc.h: spinlock_t needs spinlock_types.h Compiling a C file which includes genalloc.h but without spinlock_types.h being included before, we will see the compile error below. include/linux/genalloc.h:54:2: error: unknown type name `spinlock_t' Include spinlock_types.h from genalloc.h to fix the problem. Signed-off-by: Shawn Guo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 1eda33d7cb10..1c2fdaa2ffc3 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -30,6 +30,8 @@ #ifndef __GENALLOC_H__ #define __GENALLOC_H__ +#include + struct device; struct device_node; -- cgit v1.2.3 From 77719536dc00f8fd8f5abe6dadbde5331c37f996 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 23 Jan 2014 15:53:59 -0800 Subject: conditionally define U32_MAX The symbol U32_MAX is defined in several spots. Change these definitions to be conditional. This is in preparation for the next patch, which centralizes the definition in . Signed-off-by: Alex Elder Cc: Sage Weil Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/reiserfs.h | 2 ++ include/linux/ceph/decode.h | 2 ++ net/ipv4/tcp_illinois.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index f8adaee537c2..66a2e832fa8d 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1958,7 +1958,9 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset +#ifndef U32_MAX #define U32_MAX (~(__u32)0) +#endif /* !U32_MAX */ static inline loff_t max_reiserfs_offset(struct inode *inode) { diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 0442c3d800f0..27fe66a279b1 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -10,6 +10,7 @@ /* This seemed to be the easiest place to define these */ +#ifndef U32_MAX #define U8_MAX ((u8)(~0U)) #define U16_MAX ((u16)(~0U)) #define U32_MAX ((u32)(~0U)) @@ -24,6 +25,7 @@ #define S16_MIN ((s16)(-S16_MAX - 1)) #define S32_MIN ((s32)(-S32_MAX - 1)) #define S64_MIN ((s64)(-S64_MAX - 1LL)) +#endif /* !U32_MAX */ /* * in all cases, diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 8a520996f3d2..f43947235b35 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -23,7 +23,9 @@ #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ +#ifndef U32_MAX #define U32_MAX ((u32)~0U) +#endif /* !U32_MAX */ #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ #define BETA_SHIFT 6 -- cgit v1.2.3 From 89a0714106aac7309c7dfa0f004b39e1e89d2942 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 23 Jan 2014 15:54:00 -0800 Subject: kernel.h: define u8, s8, u32, etc. limits Create constants that define the maximum and minimum values representable by the kernel types u8, s8, u16, s16, and so on. Signed-off-by: Alex Elder Cc: Sage Weil Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2aa3d4b000e6..f74bb581ab64 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -29,6 +29,19 @@ #define ULLONG_MAX (~0ULL) #define SIZE_MAX (~(size_t)0) +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) + #define STACK_MAGIC 0xdeadbeef #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) -- cgit v1.2.3 From 04f9b74e4d96d349de12fdd4e6626af4a9f75e09 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 23 Jan 2014 15:54:01 -0800 Subject: remove extra definitions of U32_MAX Now that the definition is centralized in , the definitions of U32_MAX (and related) elsewhere in the kernel can be removed. Signed-off-by: Alex Elder Acked-by: Sage Weil Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/reiserfs.h | 4 ---- include/linux/ceph/decode.h | 19 ------------------- net/ipv4/tcp_illinois.c | 3 --- 3 files changed, 26 deletions(-) (limited to 'include/linux') diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 66a2e832fa8d..dfb617b2bad2 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1958,10 +1958,6 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,} #define MAX_US_INT 0xffff // reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset -#ifndef U32_MAX -#define U32_MAX (~(__u32)0) -#endif /* !U32_MAX */ - static inline loff_t max_reiserfs_offset(struct inode *inode) { if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 27fe66a279b1..a6ef9cc267ec 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -8,25 +8,6 @@ #include -/* This seemed to be the easiest place to define these */ - -#ifndef U32_MAX -#define U8_MAX ((u8)(~0U)) -#define U16_MAX ((u16)(~0U)) -#define U32_MAX ((u32)(~0U)) -#define U64_MAX ((u64)(~0ULL)) - -#define S8_MAX ((s8)(U8_MAX >> 1)) -#define S16_MAX ((s16)(U16_MAX >> 1)) -#define S32_MAX ((s32)(U32_MAX >> 1)) -#define S64_MAX ((s64)(U64_MAX >> 1LL)) - -#define S8_MIN ((s8)(-S8_MAX - 1)) -#define S16_MIN ((s16)(-S16_MAX - 1)) -#define S32_MIN ((s32)(-S32_MAX - 1)) -#define S64_MIN ((s64)(-S64_MAX - 1LL)) -#endif /* !U32_MAX */ - /* * in all cases, * void **p pointer to position pointer diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index f43947235b35..e498a62b8f97 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -23,9 +23,6 @@ #define ALPHA_MIN ((3*ALPHA_SCALE)/10) /* ~0.3 */ #define ALPHA_MAX (10*ALPHA_SCALE) /* 10.0 */ #define ALPHA_BASE ALPHA_SCALE /* 1.0 */ -#ifndef U32_MAX -#define U32_MAX ((u32)~0U) -#endif /* !U32_MAX */ #define RTT_MAX (U32_MAX / ALPHA_MAX) /* 3.3 secs */ #define BETA_SHIFT 6 -- cgit v1.2.3 From 00b2c76a6abbe082bb5afb89ee49ec325e9cd4d2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 23 Jan 2014 15:54:02 -0800 Subject: include/linux/of.h: make for_each_child_of_node() reference its args when CONFIG_OF=n Make for_each_child_of_node() reference its args when CONFIG_OF=n to avoid warnings like: drivers/leds/leds-pwm.c:88:22: warning: unused variable 'node' [-Wunused-variable] struct device_node *node = pdev->dev.of_node; ^ Signed-off-by: David Howells Cc: Grant Likely Cc: Rob Herring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/of.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 276c546980d8..70c64ba17fa5 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -377,8 +377,13 @@ static inline bool of_have_populated_dt(void) return false; } +/* Kill an unused variable warning on a device_node pointer */ +static inline void __of_use_dn(const struct device_node *np) +{ +} + #define for_each_child_of_node(parent, child) \ - while (0) + while (__of_use_dn(parent), __of_use_dn(child), 0) #define for_each_available_child_of_node(parent, child) \ while (0) -- cgit v1.2.3 From aace05097a0fd467230e39acb148be0fdaa90068 Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 23 Jan 2014 15:54:12 -0800 Subject: lib/parser.c: add match_wildcard() function match_wildcard function is a simple implementation of wildcard matching algorithm. It only supports two usual wildcardes: '*' - matches zero or more characters '?' - matches one character This algorithm is safe since it is non-recursive. Signed-off-by: Du, Changbin Cc: Jason Baron Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/parser.h | 1 + lib/parser.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/parser.h b/include/linux/parser.h index ea2281e726f6..39d5b7955b23 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -29,5 +29,6 @@ int match_token(char *, const match_table_t table, substring_t args[]); int match_int(substring_t *, int *result); int match_octal(substring_t *, int *result); int match_hex(substring_t *, int *result); +bool match_wildcard(const char *pattern, const char *str); size_t match_strlcpy(char *, const substring_t *, size_t); char *match_strdup(const substring_t *); diff --git a/lib/parser.c b/lib/parser.c index 807b2aaa33fa..ee5295541cea 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -192,6 +192,56 @@ int match_hex(substring_t *s, int *result) return match_number(s, result, 16); } +/** + * match_wildcard: - parse if a string matches given wildcard pattern + * @pattern: wildcard pattern + * @str: the string to be parsed + * + * Description: Parse the string @str to check if matches wildcard + * pattern @pattern. The pattern may contain two type wildcardes: + * '*' - matches zero or more characters + * '?' - matches one character + * If it's matched, return true, else return false. + */ +bool match_wildcard(const char *pattern, const char *str) +{ + const char *s = str; + const char *p = pattern; + bool star = false; + + while (*s) { + switch (*p) { + case '?': + s++; + p++; + break; + case '*': + star = true; + str = s; + if (!*++p) + return true; + pattern = p; + break; + default: + if (*s == *p) { + s++; + p++; + } else { + if (!star) + return false; + str++; + s = str; + p = pattern; + } + break; + } + } + + if (*p == '*') + ++p; + return !*p; +} + /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer * @dest: where to copy to @@ -235,5 +285,6 @@ EXPORT_SYMBOL(match_token); EXPORT_SYMBOL(match_int); EXPORT_SYMBOL(match_octal); EXPORT_SYMBOL(match_hex); +EXPORT_SYMBOL(match_wildcard); EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); -- cgit v1.2.3 From c28aa1f0a847c36daa4280b611e2b54bad75c576 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 23 Jan 2014 15:54:16 -0800 Subject: printk/cache: mark printk_once test variable __read_mostly Add #include to define __read_mostly. Convert cache.h to use uapi/linux/kernel.h instead of linux/kernel.h to avoid recursive #includes. Convert the ALIGN macro to __ALIGN_KERNEL. printk_once only sets the bool variable tested once so mark it __read_mostly. Neaten the alignment so it matches the rest of the pr__once #defines too. Signed-off-by: Joe Perches Reviewed-by: James Hogan Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/processor.h | 1 + include/linux/cache.h | 4 ++-- include/linux/printk.h | 19 ++++++++++--------- 3 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 5a84b3a50741..efd1b927ccb7 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -71,6 +71,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/cache.h b/include/linux/cache.h index 4c570653ab84..17e7e82d2aa7 100644 --- a/include/linux/cache.h +++ b/include/linux/cache.h @@ -1,11 +1,11 @@ #ifndef __LINUX_CACHE_H #define __LINUX_CACHE_H -#include +#include #include #ifndef L1_CACHE_ALIGN -#define L1_CACHE_ALIGN(x) ALIGN(x, L1_CACHE_BYTES) +#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) #endif #ifndef SMP_CACHE_BYTES diff --git a/include/linux/printk.h b/include/linux/printk.h index 694925837a16..cc6f74d65167 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -5,6 +5,7 @@ #include #include #include +#include extern const char linux_banner[]; extern const char linux_proc_banner[]; @@ -253,17 +254,17 @@ extern asmlinkage void dump_stack(void) __cold; */ #ifdef CONFIG_PRINTK -#define printk_once(fmt, ...) \ -({ \ - static bool __print_once; \ - \ - if (!__print_once) { \ - __print_once = true; \ - printk(fmt, ##__VA_ARGS__); \ - } \ +#define printk_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk(fmt, ##__VA_ARGS__); \ + } \ }) #else -#define printk_once(fmt, ...) \ +#define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) #endif -- cgit v1.2.3 From 0fa9aa20c33d76e98f44ff1de6e128e39a7738ca Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 23 Jan 2014 15:54:54 -0800 Subject: fs/ramfs/file-nommu.c: make ramfs_nommu_get_unmapped_area() and ramfs_nommu_mmap() static Since commit 853ac43ab194 ("shmem: unify regular and tiny shmem"), ramfs_nommu_get_unmapped_area() and ramfs_nommu_mmap() are not directly referenced outside of file-nommu.c. Thus make them static. Signed-off-by: Axel Lin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ramfs/file-nommu.c | 10 ++++++++-- include/linux/ramfs.h | 7 ------- 2 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 8d5b438cc188..80862b1aeea7 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -27,6 +27,12 @@ #include "internal.h" static int ramfs_nommu_setattr(struct dentry *, struct iattr *); +static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, + unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags); +static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); const struct address_space_operations ramfs_aops = { .readpage = simple_readpage, @@ -197,7 +203,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia) * - the pages to be mapped must exist * - the pages be physically contiguous in sequence */ -unsigned long ramfs_nommu_get_unmapped_area(struct file *file, +static unsigned long ramfs_nommu_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { @@ -256,7 +262,7 @@ out: /* * set up a mapping for shared memory segments */ -int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) +static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma) { if (!(vma->vm_flags & VM_SHARED)) return -ENOSYS; diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 753207c8ce20..ecc730977a5a 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -14,13 +14,6 @@ ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) } #else extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize); -extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, - unsigned long addr, - unsigned long len, - unsigned long pgoff, - unsigned long flags); - -extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); #endif extern const struct file_operations ramfs_file_operations; -- cgit v1.2.3 From 7288e1187ba935996232246916418c64bb88da30 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:32 -0800 Subject: coredump: kill MMF_DUMPABLE and MMF_DUMP_SECURELY Nobody actually needs MMF_DUMPABLE/MMF_DUMP_SECURELY, they are only used to enforce the encoding of SUID_DUMP_* enum in mm->flags & MMF_DUMPABLE_MASK. Now that set_dumpable() updates both bits atomically we can kill them and simply store the value "as is" in 2 lower bits. Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Alex Kelly Cc: "Eric W. Biederman" Cc: Josh Triplett Cc: Petr Matousek Cc: Vasily Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 21 ++++++--------------- include/linux/sched.h | 4 +--- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index f039386499db..f798da06abac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1613,33 +1613,24 @@ void set_binfmt(struct linux_binfmt *new) EXPORT_SYMBOL(set_binfmt); /* - * set_dumpable converts traditional three-value dumpable to two flags and - * stores them into mm->flags. + * set_dumpable stores three-value SUID_DUMP_* into mm->flags. */ void set_dumpable(struct mm_struct *mm, int value) { unsigned long old, new; + if (WARN_ON((unsigned)value > SUID_DUMP_ROOT)) + return; + do { old = ACCESS_ONCE(mm->flags); - new = old & ~MMF_DUMPABLE_MASK; - - switch (value) { - case SUID_DUMP_ROOT: - new |= (1 << MMF_DUMP_SECURELY); - case SUID_DUMP_USER: - new |= (1<< MMF_DUMPABLE); - } - + new = (old & ~MMF_DUMPABLE_MASK) | value; } while (cmpxchg(&mm->flags, old, new) != old); } int __get_dumpable(unsigned long mm_flags) { - int ret; - - ret = mm_flags & MMF_DUMPABLE_MASK; - return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; + return mm_flags & MMF_DUMPABLE_MASK; } /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 485234d2fd42..124430ba569b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -400,10 +400,8 @@ extern int get_dumpable(struct mm_struct *mm); #define SUID_DUMP_ROOT 2 /* Dump as root */ /* mm flags */ -/* dumpable bits */ -#define MMF_DUMPABLE 0 /* core dump is permitted */ -#define MMF_DUMP_SECURELY 1 /* core file is readable only by root */ +/* for SUID_DUMP_* above */ #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) -- cgit v1.2.3 From 942be3875a1931c379bbc37053829dd6847e0f3f Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:34 -0800 Subject: coredump: make __get_dumpable/get_dumpable inline, kill fs/coredump.h 1. Remove fs/coredump.h. It is not clear why do we need it, it only declares __get_dumpable(), signal.c includes it for no reason. 2. Now that get_dumpable() and __get_dumpable() are really trivial make them inline in linux/sched.h. Signed-off-by: Oleg Nesterov Acked-by: Kees Cook Cc: Alex Kelly Cc: "Eric W. Biederman" Cc: Josh Triplett Cc: Petr Matousek Cc: Vasily Kulikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/coredump.c | 1 - fs/coredump.h | 6 ------ fs/exec.c | 18 ------------------ include/linux/sched.h | 21 +++++++++++++++++---- 4 files changed, 17 insertions(+), 29 deletions(-) delete mode 100644 fs/coredump.h (limited to 'include/linux') diff --git a/fs/coredump.c b/fs/coredump.c index bc3fbcd32558..e3ad709a4232 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -40,7 +40,6 @@ #include #include "internal.h" -#include "coredump.h" #include diff --git a/fs/coredump.h b/fs/coredump.h deleted file mode 100644 index e39ff072110d..000000000000 --- a/fs/coredump.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _FS_COREDUMP_H -#define _FS_COREDUMP_H - -extern int __get_dumpable(unsigned long mm_flags); - -#endif diff --git a/fs/exec.c b/fs/exec.c index f798da06abac..9cbad5b0187e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -62,7 +62,6 @@ #include #include "internal.h" -#include "coredump.h" #include @@ -1609,7 +1608,6 @@ void set_binfmt(struct linux_binfmt *new) if (new) __module_get(new->module); } - EXPORT_SYMBOL(set_binfmt); /* @@ -1628,22 +1626,6 @@ void set_dumpable(struct mm_struct *mm, int value) } while (cmpxchg(&mm->flags, old, new) != old); } -int __get_dumpable(unsigned long mm_flags) -{ - return mm_flags & MMF_DUMPABLE_MASK; -} - -/* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. - */ -int get_dumpable(struct mm_struct *mm) -{ - return __get_dumpable(mm->flags); -} - SYSCALL_DEFINE3(execve, const char __user *, filename, const char __user *const __user *, argv, diff --git a/include/linux/sched.h b/include/linux/sched.h index 124430ba569b..cf9e414dbb9e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -391,10 +391,6 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, static inline void arch_pick_mmap_layout(struct mm_struct *mm) {} #endif - -extern void set_dumpable(struct mm_struct *mm, int value); -extern int get_dumpable(struct mm_struct *mm); - #define SUID_DUMP_DISABLE 0 /* No setuid dumping */ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ @@ -405,6 +401,23 @@ extern int get_dumpable(struct mm_struct *mm); #define MMF_DUMPABLE_BITS 2 #define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1) +extern void set_dumpable(struct mm_struct *mm, int value); +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ +static inline int __get_dumpable(unsigned long mm_flags) +{ + return mm_flags & MMF_DUMPABLE_MASK; +} + +static inline int get_dumpable(struct mm_struct *mm) +{ + return __get_dumpable(mm->flags); +} + /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 -- cgit v1.2.3 From 74e37200de8e9c4e09b70c21c3f13c2071e77457 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:35 -0800 Subject: proc: cleanup/simplify get_task_state/task_state_array get_task_state() and task_state_array[] look confusing and suboptimal, it is not clear what it can actually report to user-space and task_state_array[] blows .data for no reason. 1. state = (tsk->state & TASK_REPORT) | tsk->exit_state is not clear. TASK_REPORT is self-documenting but it is not clear what ->exit_state can add. Move the potential exit_state's (EXIT_ZOMBIE and EXIT_DEAD) into TASK_REPORT and use it to calculate the final result. 2. With the change above it is obvious that task_state_array[] has the unused entries just to make BUILD_BUG_ON() happy. Change this BUILD_BUG_ON() to use TASK_REPORT rather than TASK_STATE_MAX and shrink task_state_array[]. 3. Turn the "while (state)" loop into fls(state). Signed-off-by: Oleg Nesterov Cc: Peter Zijlstra Cc: David Laight Cc: Geert Uytterhoeven Cc: Ingo Molnar Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 15 +++------------ include/linux/sched.h | 2 +- 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/array.c b/fs/proc/array.c index 1bd2077187fd..554a0b229ac2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -140,24 +140,15 @@ static const char * const task_state_array[] = { "t (tracing stop)", /* 8 */ "Z (zombie)", /* 16 */ "X (dead)", /* 32 */ - "x (dead)", /* 64 */ - "K (wakekill)", /* 128 */ - "W (waking)", /* 256 */ - "P (parked)", /* 512 */ }; static inline const char *get_task_state(struct task_struct *tsk) { - unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state; - const char * const *p = &task_state_array[0]; + unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; - BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array)); + BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); - while (state) { - p++; - state >>= 1; - } - return *p; + return task_state_array[fls(state)]; } static inline void task_state(struct seq_file *m, struct pid_namespace *ns, diff --git a/include/linux/sched.h b/include/linux/sched.h index cf9e414dbb9e..33e4e9e1f621 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -229,7 +229,7 @@ extern char ___assert_task_state[1 - 2*!!( /* get_task_state() */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ - __TASK_TRACED) + __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) #define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0) -- cgit v1.2.3 From ff252c1fc537b0c9e40f62da0a9d11bf0737b7db Mon Sep 17 00:00:00 2001 From: DaeSeok Youn Date: Thu, 23 Jan 2014 15:55:46 -0800 Subject: kernel/fork.c: make dup_mm() static dup_mm() is used only in kernel/fork.c Signed-off-by: Daeseok Youn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 -- kernel/fork.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33e4e9e1f621..66a17ad55bcb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2295,8 +2295,6 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); /* Remove the current tasks stale references to the old mm_struct */ extern void mm_release(struct task_struct *, struct mm_struct *); -/* Allocate a new mm structure and copy contents from tsk->mm */ -extern struct mm_struct *dup_mm(struct task_struct *tsk); extern int copy_thread(unsigned long, unsigned long, unsigned long, struct task_struct *); diff --git a/kernel/fork.c b/kernel/fork.c index 2f11bbe376b0..5615ead014e3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -800,7 +800,7 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ -struct mm_struct *dup_mm(struct task_struct *tsk) +static struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; -- cgit v1.2.3 From 98611e4e6a2b4a03fd2d4750cce8e4455a995c8d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 23 Jan 2014 15:55:52 -0800 Subject: exec: kill task_struct->did_exec We can kill either task->did_exec or PF_FORKNOEXEC, they are mutually exclusive. The patch kills ->did_exec because it has a single user. Signed-off-by: Oleg Nesterov Acked-by: KOSAKI Motohiro Cc: Al Viro Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 1 - include/linux/sched.h | 1 - kernel/fork.c | 1 - kernel/sys.c | 5 ++--- 4 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index f860866e04ba..493b102a27c1 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1424,7 +1424,6 @@ static int exec_binprm(struct linux_binprm *bprm) audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); - current->did_exec = 1; proc_exec_connector(current); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 66a17ad55bcb..68a0e84463a0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1239,7 +1239,6 @@ struct task_struct { /* Used for emulating ABI behavior of previous Linux versions */ unsigned int personality; - unsigned did_exec:1; unsigned in_execve:1; /* Tell the LSMs that the process is doing an * execve */ unsigned in_iowait:1; diff --git a/kernel/fork.c b/kernel/fork.c index b6dd0bbf4240..a17621c6cd42 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1226,7 +1226,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; - p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ copy_flags(clone_flags, p); INIT_LIST_HEAD(&p->children); diff --git a/kernel/sys.c b/kernel/sys.c index c72311324ea7..ecd3ea12f72a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -895,8 +895,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf) * only important on a multi-user system anyway, to make sure one user * can't send a signal to a process owned by another. -TYT, 12/12/91 * - * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. - * LBT 04.03.94 + * !PF_FORKNOEXEC check to conform completely to POSIX. */ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { @@ -932,7 +931,7 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; - if (p->did_exec) + if (!(p->flags & PF_FORKNOEXEC)) goto out; } else { err = -ESRCH; -- cgit v1.2.3 From 7984754b99b6c89054edc405e9d9d35810a91d36 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 23 Jan 2014 15:55:59 -0800 Subject: kexec: add sysctl to disable kexec_load For general-purpose (i.e. distro) kernel builds it makes sense to build with CONFIG_KEXEC to allow end users to choose what kind of things they want to do with kexec. However, in the face of trying to lock down a system with such a kernel, there needs to be a way to disable kexec_load (much like module loading can be disabled). Without this, it is too easy for the root user to modify kernel memory even when CONFIG_STRICT_DEVMEM and modules_disabled are set. With this change, it is still possible to load an image for use later, then disable kexec_load so the image (or lack of image) can't be altered. The intention is for using this in environments where "perfect" enforcement is hard. Without a verified boot, along with verified modules, and along with verified kexec, this is trying to give a system a better chance to defend itself (or at least grow the window of discoverability) against attack in the face of a privilege escalation. In my mind, I consider several boot scenarios: 1) Verified boot of read-only verified root fs loading fd-based verification of kexec images. 2) Secure boot of writable root fs loading signed kexec images. 3) Regular boot loading kexec (e.g. kcrash) image early and locking it. 4) Regular boot with no control of kexec image at all. 1 and 2 don't exist yet, but will soon once the verified kexec series has landed. 4 is the state of things now. The gap between 2 and 4 is too large, so this change creates scenario 3, a middle-ground above 4 when 2 and 1 are not possible for a system. Signed-off-by: Kees Cook Acked-by: Rik van Riel Cc: Vivek Goyal Cc: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysctl/kernel.txt | 15 ++++++++++++++- include/linux/kexec.h | 1 + kernel/kexec.c | 3 ++- kernel/sysctl.c | 13 +++++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 6d486404200e..ee9a2f983b99 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -33,6 +33,7 @@ show up in /proc/sys/kernel: - domainname - hostname - hotplug +- kexec_load_disabled - kptr_restrict - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] @@ -287,6 +288,18 @@ Default value is "/sbin/hotplug". ============================================================== +kexec_load_disabled: + +A toggle indicating if the kexec_load syscall has been disabled. This +value defaults to 0 (false: kexec_load enabled), but can be set to 1 +(true: kexec_load disabled). Once true, kexec can no longer be used, and +the toggle cannot be set back to false. This allows a kexec image to be +loaded before disabling the syscall, allowing a system to set up (and +later use) an image without it being altered. Generally used together +with the "modules_disabled" sysctl. + +============================================================== + kptr_restrict: This toggle indicates whether restrictions are placed on @@ -331,7 +344,7 @@ A toggle value indicating if modules are allowed to be loaded in an otherwise modular kernel. This toggle defaults to off (0), but can be set true (1). Once true, modules can be neither loaded nor unloaded, and the toggle cannot be set back -to false. +to false. Generally used with the "kexec_load_disabled" toggle. ============================================================== diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 5fd33dc1fe3a..6d4066cdb5b5 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -170,6 +170,7 @@ unsigned long paddr_vmcoreinfo_note(void); extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; +extern int kexec_load_disabled; #ifndef kexec_flush_icache_page #define kexec_flush_icache_page(page) diff --git a/kernel/kexec.c b/kernel/kexec.c index 9c970167e402..ac738781d356 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -932,6 +932,7 @@ static int kimage_load_segment(struct kimage *image, */ struct kimage *kexec_image; struct kimage *kexec_crash_image; +int kexec_load_disabled; static DEFINE_MUTEX(kexec_mutex); @@ -942,7 +943,7 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, int result; /* We only trust the superuser with rebooting the system. */ - if (!capable(CAP_SYS_BOOT)) + if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) return -EPERM; /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 693eac39c202..096db7452cbd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include @@ -614,6 +615,18 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_KEXEC + { + .procname = "kexec_load_disabled", + .data = &kexec_load_disabled, + .maxlen = sizeof(int), + .mode = 0644, + /* only handle a transition from default "0" to "1" */ + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &one, + }, +#endif #ifdef CONFIG_MODULES { .procname = "modprobe", -- cgit v1.2.3 From 3089a4c8d3abc7e2ab105d1d39d415110d1566d6 Mon Sep 17 00:00:00 2001 From: Evgeny Boger Date: Thu, 23 Jan 2014 15:56:18 -0800 Subject: drivers/w1/masters/w1-gpio.c: add strong pullup emulation Strong pullup is emulated by driving pin logic high after write command when using tri-state push-pull GPIO. Signed-off-by: Evgeny Boger Cc: Greg KH Acked-by: David Fries Acked-by: Evgeniy Polyakov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/w1/masters/w1-gpio.c | 22 ++++++++++++++++++++++ drivers/w1/w1_int.c | 12 ------------ include/linux/w1-gpio.h | 1 + 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/w1/masters/w1-gpio.c b/drivers/w1/masters/w1-gpio.c index e36b18b2817b..9709b8b484ba 100644 --- a/drivers/w1/masters/w1-gpio.c +++ b/drivers/w1/masters/w1-gpio.c @@ -18,10 +18,31 @@ #include #include #include +#include #include "../w1.h" #include "../w1_int.h" +static u8 w1_gpio_set_pullup(void *data, int delay) +{ + struct w1_gpio_platform_data *pdata = data; + + if (delay) { + pdata->pullup_duration = delay; + } else { + if (pdata->pullup_duration) { + gpio_direction_output(pdata->pin, 1); + + msleep(pdata->pullup_duration); + + gpio_direction_input(pdata->pin); + } + pdata->pullup_duration = 0; + } + + return 0; +} + static void w1_gpio_write_bit_dir(void *data, u8 bit) { struct w1_gpio_platform_data *pdata = data; @@ -132,6 +153,7 @@ static int w1_gpio_probe(struct platform_device *pdev) } else { gpio_direction_input(pdata->pin); master->write_bit = w1_gpio_write_bit_dir; + master->set_pullup = w1_gpio_set_pullup; } err = w1_add_master_device(master); diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 5a98649f6abc..590bd8a7cd1b 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -117,18 +117,6 @@ int w1_add_master_device(struct w1_bus_master *master) printk(KERN_ERR "w1_add_master_device: invalid function set\n"); return(-EINVAL); } - /* While it would be electrically possible to make a device that - * generated a strong pullup in bit bang mode, only hardware that - * controls 1-wire time frames are even expected to support a strong - * pullup. w1_io.c would need to support calling set_pullup before - * the last write_bit operation of a w1_write_8 which it currently - * doesn't. - */ - if (!master->write_byte && !master->touch_bit && master->set_pullup) { - printk(KERN_ERR "w1_add_master_device: set_pullup requires " - "write_byte or touch_bit, disabling\n"); - master->set_pullup = NULL; - } /* Lock until the device is added (or not) to w1_masters. */ mutex_lock(&w1_mlock); diff --git a/include/linux/w1-gpio.h b/include/linux/w1-gpio.h index 065e3ae79ab0..d58594a32324 100644 --- a/include/linux/w1-gpio.h +++ b/include/linux/w1-gpio.h @@ -20,6 +20,7 @@ struct w1_gpio_platform_data { unsigned int is_open_drain:1; void (*enable_external_pullup)(int enable); unsigned int ext_pullup_enable_pin; + unsigned int pullup_duration; }; #endif /* _LINUX_W1_GPIO_H */ -- cgit v1.2.3