diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 05:23:28 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 05:23:28 +0300 |
commit | 59d53737a8640482995fea13c6e2c0fd016115d6 (patch) | |
tree | 3423eb92315865d76cb8d488513bfef6ab9251d0 /include | |
parent | d3f180ea1a44aecba1b0dab2a253428e77f906bf (diff) | |
parent | 8138a67a5557ffea3a21dfd6f037842d4e748513 (diff) | |
download | linux-59d53737a8640482995fea13c6e2c0fd016115d6.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge second set of updates from Andrew Morton:
"More of MM"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (83 commits)
mm/nommu.c: fix arithmetic overflow in __vm_enough_memory()
mm/mmap.c: fix arithmetic overflow in __vm_enough_memory()
vmstat: Reduce time interval to stat update on idle cpu
mm/page_owner.c: remove unnecessary stack_trace field
Documentation/filesystems/proc.txt: describe /proc/<pid>/map_files
mm: incorporate read-only pages into transparent huge pages
vmstat: do not use deferrable delayed work for vmstat_update
mm: more aggressive page stealing for UNMOVABLE allocations
mm: always steal split buddies in fallback allocations
mm: when stealing freepages, also take pages created by splitting buddy page
mincore: apply page table walker on do_mincore()
mm: /proc/pid/clear_refs: avoid split_huge_page()
mm: pagewalk: fix misbehavior of walk_page_range for vma(VM_PFNMAP)
mempolicy: apply page table walker on queue_pages_range()
arch/powerpc/mm/subpage-prot.c: use walk->vma and walk_page_vma()
memcg: cleanup preparation for page table walk
numa_maps: remove numa_maps->vma
numa_maps: fix typo in gather_hugetbl_stats
pagemap: use walk->vma instead of calling find_vma()
clear_refs: remove clear_refs_private->vma and introduce clear_refs_test_walk()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/asm-generic/4level-fixup.h | 1 | ||||
-rw-r--r-- | include/linux/compaction.h | 86 | ||||
-rw-r--r-- | include/linux/gfp.h | 12 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 12 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 8 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 11 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 50 | ||||
-rw-r--r-- | include/linux/mm.h | 69 | ||||
-rw-r--r-- | include/linux/mm_types.h | 11 | ||||
-rw-r--r-- | include/linux/mmzone.h | 15 | ||||
-rw-r--r-- | include/linux/oom.h | 18 | ||||
-rw-r--r-- | include/linux/page_counter.h | 3 | ||||
-rw-r--r-- | include/linux/page_ext.h | 2 | ||||
-rw-r--r-- | include/linux/swap.h | 15 | ||||
-rw-r--r-- | include/linux/swapops.h | 4 | ||||
-rw-r--r-- | include/trace/events/compaction.h | 209 | ||||
-rw-r--r-- | include/trace/events/kmem.h | 7 | ||||
-rw-r--r-- | include/uapi/linux/kernel-page-flags.h | 1 |
18 files changed, 364 insertions, 170 deletions
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h index 77ff547730af..5bdab6bffd23 100644 --- a/include/asm-generic/4level-fixup.h +++ b/include/asm-generic/4level-fixup.h @@ -4,6 +4,7 @@ #define __ARCH_HAS_4LEVEL_HACK #define __PAGETABLE_PUD_FOLDED +#define PUD_SHIFT PGDIR_SHIFT #define PUD_SIZE PGDIR_SIZE #define PUD_MASK PGDIR_MASK #define PTRS_PER_PUD 1 diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 3238ffa33f68..a014559e4a49 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -12,6 +12,10 @@ #define COMPACT_PARTIAL 3 /* The full zone was compacted */ #define COMPACT_COMPLETE 4 +/* For more detailed tracepoint output */ +#define COMPACT_NO_SUITABLE_PAGE 5 +#define COMPACT_NOT_SUITABLE_ZONE 6 +/* When adding new state, please change compaction_status_string, too */ /* Used to signal whether compaction detected need_sched() or lock contention */ /* No contention detected */ @@ -21,6 +25,8 @@ /* Zone lock or lru_lock was contended in async compaction */ #define COMPACT_CONTENDED_LOCK 2 +struct alloc_context; /* in mm/internal.h */ + #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, @@ -30,81 +36,25 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); extern int fragmentation_index(struct zone *zone, unsigned int order); -extern unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *mask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx); +extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order, + int alloc_flags, const struct alloc_context *ac, + enum migrate_mode mode, int *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order, int alloc_flags, int classzone_idx); -/* Do not skip compaction more than 64 times */ -#define COMPACT_MAX_DEFER_SHIFT 6 - -/* - * Compaction is deferred when compaction fails to result in a page - * allocation success. 1 << compact_defer_limit compactions are skipped up - * to a limit of 1 << COMPACT_MAX_DEFER_SHIFT - */ -static inline void defer_compaction(struct zone *zone, int order) -{ - zone->compact_considered = 0; - zone->compact_defer_shift++; - - if (order < zone->compact_order_failed) - zone->compact_order_failed = order; - - if (zone->compact_defer_shift > COMPACT_MAX_DEFER_SHIFT) - zone->compact_defer_shift = COMPACT_MAX_DEFER_SHIFT; -} - -/* Returns true if compaction should be skipped this time */ -static inline bool compaction_deferred(struct zone *zone, int order) -{ - unsigned long defer_limit = 1UL << zone->compact_defer_shift; - - if (order < zone->compact_order_failed) - return false; - - /* Avoid possible overflow */ - if (++zone->compact_considered > defer_limit) - zone->compact_considered = defer_limit; - - return zone->compact_considered < defer_limit; -} - -/* - * Update defer tracking counters after successful compaction of given order, - * which means an allocation either succeeded (alloc_success == true) or is - * expected to succeed. - */ -static inline void compaction_defer_reset(struct zone *zone, int order, - bool alloc_success) -{ - if (alloc_success) { - zone->compact_considered = 0; - zone->compact_defer_shift = 0; - } - if (order >= zone->compact_order_failed) - zone->compact_order_failed = order + 1; -} - -/* Returns true if restarting compaction after many failures */ -static inline bool compaction_restarting(struct zone *zone, int order) -{ - if (order < zone->compact_order_failed) - return false; - - return zone->compact_defer_shift == COMPACT_MAX_DEFER_SHIFT && - zone->compact_considered >= 1UL << zone->compact_defer_shift; -} +extern void defer_compaction(struct zone *zone, int order); +extern bool compaction_deferred(struct zone *zone, int order); +extern void compaction_defer_reset(struct zone *zone, int order, + bool alloc_success); +extern bool compaction_restarting(struct zone *zone, int order); #else -static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, - int order, gfp_t gfp_mask, nodemask_t *nodemask, - enum migrate_mode mode, int *contended, - int alloc_flags, int classzone_idx) +static inline unsigned long try_to_compact_pages(gfp_t gfp_mask, + unsigned int order, int alloc_flags, + const struct alloc_context *ac, + enum migrate_mode mode, int *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b840e3b2770d..51bd1e72a917 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -334,18 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) } extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, struct vm_area_struct *vma, unsigned long addr, - int node); + int node, bool hugepage); +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ + alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else #define alloc_pages(gfp_mask, order) \ alloc_pages_node(numa_node_id(), gfp_mask, order) -#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ +#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ + alloc_pages(gfp_mask, order) +#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages(gfp_mask, order) #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) + alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node) + alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index ad9051bab267..f10b20f05159 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -157,6 +157,13 @@ static inline int hpage_nr_pages(struct page *page) extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t pmd, pmd_t *pmdp); +extern struct page *huge_zero_page; + +static inline bool is_huge_zero_page(struct page *page) +{ + return ACCESS_ONCE(huge_zero_page) == page; +} + #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) @@ -206,6 +213,11 @@ static inline int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_str return 0; } +static inline bool is_huge_zero_page(struct page *page) +{ + return false; +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7d7856359920..7b5785032049 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -99,9 +99,9 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, int write); struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address, - pmd_t *pmd, int write); + pmd_t *pmd, int flags); struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address, - pud_t *pud, int write); + pud_t *pud, int flags); int pmd_huge(pmd_t pmd); int pud_huge(pud_t pmd); unsigned long hugetlb_change_protection(struct vm_area_struct *vma, @@ -133,8 +133,8 @@ static inline void hugetlb_report_meminfo(struct seq_file *m) static inline void hugetlb_show_meminfo(void) { } -#define follow_huge_pmd(mm, addr, pmd, write) NULL -#define follow_huge_pud(mm, addr, pud, write) NULL +#define follow_huge_pmd(mm, addr, pmd, flags) NULL +#define follow_huge_pud(mm, addr, pud, flags) NULL #define prepare_hugepage_range(file, addr, len) (-EINVAL) #define pmd_huge(x) 0 #define pud_huge(x) 0 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 26f106022c88..d189ee098aa2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -200,17 +200,6 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif -/* - * Carry out a gup that requires IO. Allow the mm to relinquish the mmap - * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL - * controls whether we retry the gup one more time to completion in that case. - * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp - * handler. - */ -int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm, - unsigned long addr, bool write_fault, - struct page **pagep); - enum { OUTSIDE_GUEST_MODE, IN_GUEST_MODE, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fb212e1d700d..6cfd934c7c9b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -52,7 +52,27 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; +enum mem_cgroup_events_index { + MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ + MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ + MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ + MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ + MEM_CGROUP_EVENTS_NSTATS, + /* default hierarchy events */ + MEMCG_LOW = MEM_CGROUP_EVENTS_NSTATS, + MEMCG_HIGH, + MEMCG_MAX, + MEMCG_OOM, + MEMCG_NR_EVENTS, +}; + #ifdef CONFIG_MEMCG +void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr); + +bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); + int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp); void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, @@ -102,6 +122,7 @@ void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); * For memory reclaim. */ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec); +bool mem_cgroup_lruvec_online(struct lruvec *lruvec); int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list); void mem_cgroup_update_lru_size(struct lruvec *, enum lru_list, int); @@ -138,12 +159,10 @@ static inline bool mem_cgroup_disabled(void) return false; } -struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked, - unsigned long *flags); -void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked, - unsigned long *flags); +struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page); void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx, int val); +void mem_cgroup_end_page_stat(struct mem_cgroup *memcg); static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) @@ -176,6 +195,18 @@ void mem_cgroup_split_huge_fixup(struct page *head); #else /* CONFIG_MEMCG */ struct mem_cgroup; +static inline void mem_cgroup_events(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx, + unsigned int nr) +{ +} + +static inline bool mem_cgroup_low(struct mem_cgroup *root, + struct mem_cgroup *memcg) +{ + return false; +} + static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask, struct mem_cgroup **memcgp) @@ -268,6 +299,11 @@ mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return 1; } +static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) +{ + return true; +} + static inline unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) { @@ -285,14 +321,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, - bool *locked, unsigned long *flags) +static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page) { return NULL; } -static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, - bool *locked, unsigned long *flags) +static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 65db4aee738a..a4d24f3c5430 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -484,7 +484,8 @@ static inline void page_mapcount_reset(struct page *page) static inline int page_mapcount(struct page *page) { - return atomic_read(&(page)->_mapcount) + 1; + VM_BUG_ON_PAGE(PageSlab(page), page); + return atomic_read(&page->_mapcount) + 1; } static inline int page_count(struct page *page) @@ -627,29 +628,28 @@ int split_free_page(struct page *page); * prototype for that function and accessor functions. * These are _only_ valid on the head of a PG_compound page. */ -typedef void compound_page_dtor(struct page *); static inline void set_compound_page_dtor(struct page *page, compound_page_dtor *dtor) { - page[1].lru.next = (void *)dtor; + page[1].compound_dtor = dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) { - return (compound_page_dtor *)page[1].lru.next; + return page[1].compound_dtor; } static inline int compound_order(struct page *page) { if (!PageHead(page)) return 0; - return (unsigned long)page[1].lru.prev; + return page[1].compound_order; } static inline void set_compound_order(struct page *page, unsigned long order) { - page[1].lru.prev = (void *)order; + page[1].compound_order = order; } #ifdef CONFIG_MMU @@ -1164,8 +1164,6 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, /** * mm_walk - callbacks for walk_page_range - * @pgd_entry: if set, called for each non-empty PGD (top-level) entry - * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to @@ -1173,16 +1171,18 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * @pte_entry: if set, called for each non-empty PTE (4th-level) entry * @pte_hole: if set, called for each hole at all levels * @hugetlb_entry: if set, called for each hugetlb entry - * *Caution*: The caller must hold mmap_sem() if @hugetlb_entry - * is used. + * @test_walk: caller specific callback function to determine whether + * we walk over the current vma or not. A positive returned + * value means "do page table walk over the current vma," + * and a negative one means "abort current page table walk + * right now." 0 means "skip the current vma." + * @mm: mm_struct representing the target process of page table walk + * @vma: vma currently walked (NULL if walking outside vmas) + * @private: private data for callbacks' usage * - * (see walk_page_range for more details) + * (see the comment on walk_page_range() for more details) */ struct mm_walk { - int (*pgd_entry)(pgd_t *pgd, unsigned long addr, - unsigned long next, struct mm_walk *walk); - int (*pud_entry)(pud_t *pud, unsigned long addr, - unsigned long next, struct mm_walk *walk); int (*pmd_entry)(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*pte_entry)(pte_t *pte, unsigned long addr, @@ -1192,12 +1192,16 @@ struct mm_walk { int (*hugetlb_entry)(pte_t *pte, unsigned long hmask, unsigned long addr, unsigned long next, struct mm_walk *walk); + int (*test_walk)(unsigned long addr, unsigned long next, + struct mm_walk *walk); struct mm_struct *mm; + struct vm_area_struct *vma; void *private; }; int walk_page_range(unsigned long addr, unsigned long end, struct mm_walk *walk); +int walk_page_vma(struct vm_area_struct *vma, struct mm_walk *walk); void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct mm_struct *dst, struct mm_struct *src, @@ -1261,6 +1265,17 @@ long get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); +long get_user_pages_locked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + int *locked); +long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages, + unsigned int gup_flags); +long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, unsigned long nr_pages, + int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct kvec; @@ -1438,8 +1453,32 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, { return 0; } + +static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +{ + return 0; +} + +static inline void mm_inc_nr_pmds(struct mm_struct *mm) {} +static inline void mm_dec_nr_pmds(struct mm_struct *mm) {} + #else int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); + +static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +{ + return atomic_long_read(&mm->nr_pmds); +} + +static inline void mm_inc_nr_pmds(struct mm_struct *mm) +{ + atomic_long_inc(&mm->nr_pmds); +} + +static inline void mm_dec_nr_pmds(struct mm_struct *mm) +{ + atomic_long_dec(&mm->nr_pmds); +} #endif int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 07c8bd3f7b48..199a03aab8dc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -28,6 +28,8 @@ struct mem_cgroup; IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) +typedef void compound_page_dtor(struct page *); + /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -142,6 +144,12 @@ struct page { struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ + /* First tail page of compound page */ + struct { + compound_page_dtor *compound_dtor; + unsigned long compound_order; + }; + #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page->ptl */ #endif @@ -355,7 +363,8 @@ struct mm_struct { pgd_t * pgd; atomic_t mm_users; /* How many users with user space? */ atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ - atomic_long_t nr_ptes; /* Page table pages */ + atomic_long_t nr_ptes; /* PTE page table pages */ + atomic_long_t nr_pmds; /* PMD page table pages */ int map_count; /* number of VMAs */ spinlock_t page_table_lock; /* Protects page tables and some counters */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2f0856d14b21..f279d9c158cd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -426,7 +426,7 @@ struct zone { const char *name; /* - * Number of MIGRATE_RESEVE page block. To maintain for just + * Number of MIGRATE_RESERVE page block. To maintain for just * optimization. Protected by zone->lock. */ int nr_migrate_reserve_block; @@ -970,7 +970,6 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) * @z - The cursor used as a starting point for the search * @highest_zoneidx - The zone index of the highest zone to return * @nodes - An optional nodemask to filter the zonelist with - * @zone - The first suitable zone found is returned via this parameter * * This function returns the next zone at or below a given zone index that is * within the allowed nodemask using a cursor as the starting point for the @@ -980,8 +979,7 @@ static inline int zonelist_node_idx(struct zoneref *zoneref) */ struct zoneref *next_zones_zonelist(struct zoneref *z, enum zone_type highest_zoneidx, - nodemask_t *nodes, - struct zone **zone); + nodemask_t *nodes); /** * first_zones_zonelist - Returns the first zone at or below highest_zoneidx within the allowed nodemask in a zonelist @@ -1000,8 +998,10 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, nodemask_t *nodes, struct zone **zone) { - return next_zones_zonelist(zonelist->_zonerefs, highest_zoneidx, nodes, - zone); + struct zoneref *z = next_zones_zonelist(zonelist->_zonerefs, + highest_zoneidx, nodes); + *zone = zonelist_zone(z); + return z; } /** @@ -1018,7 +1018,8 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, #define for_each_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ for (z = first_zones_zonelist(zlist, highidx, nodemask, &zone); \ zone; \ - z = next_zones_zonelist(++z, highidx, nodemask, &zone)) \ + z = next_zones_zonelist(++z, highidx, nodemask), \ + zone = zonelist_zone(z)) \ /** * for_each_zone_zonelist - helper macro to iterate over valid zones in a zonelist at or below a given zone index diff --git a/include/linux/oom.h b/include/linux/oom.h index 76200984d1e2..d5771bed59c9 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -47,6 +47,10 @@ static inline bool oom_task_origin(const struct task_struct *p) return !!(p->signal->oom_flags & OOM_FLAG_ORIGIN); } +extern void mark_tsk_oom_victim(struct task_struct *tsk); + +extern void unmark_oom_victim(void); + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); @@ -68,22 +72,14 @@ extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, unsigned long totalpages, const nodemask_t *nodemask, bool force_kill); -extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, +extern bool out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *mask, bool force_kill); extern int register_oom_notifier(struct notifier_block *nb); extern int unregister_oom_notifier(struct notifier_block *nb); extern bool oom_killer_disabled; - -static inline void oom_killer_disable(void) -{ - oom_killer_disabled = true; -} - -static inline void oom_killer_enable(void) -{ - oom_killer_disabled = false; -} +extern bool oom_killer_disable(void); +extern void oom_killer_enable(void); extern struct task_struct *find_lock_task_mm(struct task_struct *p); diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 955421575d16..17fa4f8de3a6 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -41,7 +41,8 @@ int page_counter_try_charge(struct page_counter *counter, struct page_counter **fail); void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); int page_counter_limit(struct page_counter *counter, unsigned long limit); -int page_counter_memparse(const char *buf, unsigned long *nr_pages); +int page_counter_memparse(const char *buf, const char *max, + unsigned long *nr_pages); static inline void page_counter_reset_watermark(struct page_counter *counter) { diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index d2a2c84c72d0..c42981cd99aa 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -40,7 +40,7 @@ struct page_ext { #ifdef CONFIG_PAGE_OWNER unsigned int order; gfp_t gfp_mask; - struct stack_trace trace; + unsigned int nr_entries; unsigned long trace_entries[8]; #endif }; diff --git a/include/linux/swap.h b/include/linux/swap.h index 34e8b60ab973..7067eca501e2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -437,16 +437,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_MEMCG -extern void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); -#else -static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) -{ -} -#endif - #else /* CONFIG_SWAP */ #define swap_address_space(entry) (NULL) @@ -547,11 +537,6 @@ static inline swp_entry_t get_swap_page(void) return entry; } -static inline void -mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) -{ -} - #endif /* CONFIG_SWAP */ #endif /* __KERNEL__*/ #endif /* _LINUX_SWAP_H */ diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 50cbc876be56..831a3168ab35 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -135,6 +135,8 @@ static inline void make_migration_entry_read(swp_entry_t *entry) *entry = swp_entry(SWP_MIGRATION_READ, swp_offset(*entry)); } +extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, @@ -148,6 +150,8 @@ static inline int is_migration_entry(swp_entry_t swp) } #define migration_entry_to_page(swp) NULL static inline void make_migration_entry_read(swp_entry_t *entryp) { } +static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, + spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index c6814b917bdf..9a6a3fe0fb51 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -11,39 +11,55 @@ DECLARE_EVENT_CLASS(mm_compaction_isolate_template, - TP_PROTO(unsigned long nr_scanned, + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long nr_scanned, unsigned long nr_taken), - TP_ARGS(nr_scanned, nr_taken), + TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken), TP_STRUCT__entry( + __field(unsigned long, start_pfn) + __field(unsigned long, end_pfn) __field(unsigned long, nr_scanned) __field(unsigned long, nr_taken) ), TP_fast_assign( + __entry->start_pfn = start_pfn; + __entry->end_pfn = end_pfn; __entry->nr_scanned = nr_scanned; __entry->nr_taken = nr_taken; ), - TP_printk("nr_scanned=%lu nr_taken=%lu", + TP_printk("range=(0x%lx ~ 0x%lx) nr_scanned=%lu nr_taken=%lu", + __entry->start_pfn, + __entry->end_pfn, __entry->nr_scanned, __entry->nr_taken) ); DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_migratepages, - TP_PROTO(unsigned long nr_scanned, + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long nr_scanned, unsigned long nr_taken), - TP_ARGS(nr_scanned, nr_taken) + TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken) ); DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, - TP_PROTO(unsigned long nr_scanned, + + TP_PROTO( + unsigned long start_pfn, + unsigned long end_pfn, + unsigned long nr_scanned, unsigned long nr_taken), - TP_ARGS(nr_scanned, nr_taken) + TP_ARGS(start_pfn, end_pfn, nr_scanned, nr_taken) ); TRACE_EVENT(mm_compaction_migratepages, @@ -85,47 +101,198 @@ TRACE_EVENT(mm_compaction_migratepages, ); TRACE_EVENT(mm_compaction_begin, - TP_PROTO(unsigned long zone_start, unsigned long migrate_start, - unsigned long free_start, unsigned long zone_end), + TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, + unsigned long free_pfn, unsigned long zone_end, bool sync), - TP_ARGS(zone_start, migrate_start, free_start, zone_end), + TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync), TP_STRUCT__entry( __field(unsigned long, zone_start) - __field(unsigned long, migrate_start) - __field(unsigned long, free_start) + __field(unsigned long, migrate_pfn) + __field(unsigned long, free_pfn) __field(unsigned long, zone_end) + __field(bool, sync) ), TP_fast_assign( __entry->zone_start = zone_start; - __entry->migrate_start = migrate_start; - __entry->free_start = free_start; + __entry->migrate_pfn = migrate_pfn; + __entry->free_pfn = free_pfn; __entry->zone_end = zone_end; + __entry->sync = sync; ), - TP_printk("zone_start=%lu migrate_start=%lu free_start=%lu zone_end=%lu", + TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s", __entry->zone_start, - __entry->migrate_start, - __entry->free_start, - __entry->zone_end) + __entry->migrate_pfn, + __entry->free_pfn, + __entry->zone_end, + __entry->sync ? "sync" : "async") ); TRACE_EVENT(mm_compaction_end, - TP_PROTO(int status), + TP_PROTO(unsigned long zone_start, unsigned long migrate_pfn, + unsigned long free_pfn, unsigned long zone_end, bool sync, + int status), - TP_ARGS(status), + TP_ARGS(zone_start, migrate_pfn, free_pfn, zone_end, sync, status), TP_STRUCT__entry( + __field(unsigned long, zone_start) + __field(unsigned long, migrate_pfn) + __field(unsigned long, free_pfn) + __field(unsigned long, zone_end) + __field(bool, sync) __field(int, status) ), TP_fast_assign( + __entry->zone_start = zone_start; + __entry->migrate_pfn = migrate_pfn; + __entry->free_pfn = free_pfn; + __entry->zone_end = zone_end; + __entry->sync = sync; __entry->status = status; ), - TP_printk("status=%d", __entry->status) + TP_printk("zone_start=0x%lx migrate_pfn=0x%lx free_pfn=0x%lx zone_end=0x%lx, mode=%s status=%s", + __entry->zone_start, + __entry->migrate_pfn, + __entry->free_pfn, + __entry->zone_end, + __entry->sync ? "sync" : "async", + compaction_status_string[__entry->status]) +); + +TRACE_EVENT(mm_compaction_try_to_compact_pages, + + TP_PROTO( + int order, + gfp_t gfp_mask, + enum migrate_mode mode), + + TP_ARGS(order, gfp_mask, mode), + + TP_STRUCT__entry( + __field(int, order) + __field(gfp_t, gfp_mask) + __field(enum migrate_mode, mode) + ), + + TP_fast_assign( + __entry->order = order; + __entry->gfp_mask = gfp_mask; + __entry->mode = mode; + ), + + TP_printk("order=%d gfp_mask=0x%x mode=%d", + __entry->order, + __entry->gfp_mask, + (int)__entry->mode) +); + +DECLARE_EVENT_CLASS(mm_compaction_suitable_template, + + TP_PROTO(struct zone *zone, + int order, + int ret), + + TP_ARGS(zone, order, ret), + + TP_STRUCT__entry( + __field(int, nid) + __field(char *, name) + __field(int, order) + __field(int, ret) + ), + + TP_fast_assign( + __entry->nid = zone_to_nid(zone); + __entry->name = (char *)zone->name; + __entry->order = order; + __entry->ret = ret; + ), + + TP_printk("node=%d zone=%-8s order=%d ret=%s", + __entry->nid, + __entry->name, + __entry->order, + compaction_status_string[__entry->ret]) +); + +DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_finished, + + TP_PROTO(struct zone *zone, + int order, + int ret), + + TP_ARGS(zone, order, ret) +); + +DEFINE_EVENT(mm_compaction_suitable_template, mm_compaction_suitable, + + TP_PROTO(struct zone *zone, + int order, + int ret), + + TP_ARGS(zone, order, ret) +); + +#ifdef CONFIG_COMPACTION +DECLARE_EVENT_CLASS(mm_compaction_defer_template, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order), + + TP_STRUCT__entry( + __field(int, nid) + __field(char *, name) + __field(int, order) + __field(unsigned int, considered) + __field(unsigned int, defer_shift) + __field(int, order_failed) + ), + + TP_fast_assign( + __entry->nid = zone_to_nid(zone); + __entry->name = (char *)zone->name; + __entry->order = order; + __entry->considered = zone->compact_considered; + __entry->defer_shift = zone->compact_defer_shift; + __entry->order_failed = zone->compact_order_failed; + ), + + TP_printk("node=%d zone=%-8s order=%d order_failed=%d consider=%u limit=%lu", + __entry->nid, + __entry->name, + __entry->order, + __entry->order_failed, + __entry->considered, + 1UL << __entry->defer_shift) +); + +DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_deferred, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order) +); + +DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_compaction, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order) +); + +DEFINE_EVENT(mm_compaction_defer_template, mm_compaction_defer_reset, + + TP_PROTO(struct zone *zone, int order), + + TP_ARGS(zone, order) ); +#endif #endif /* _TRACE_COMPACTION_H */ diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aece1346ceb7..4ad10baecd4d 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -268,11 +268,11 @@ TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, int alloc_order, int fallback_order, - int alloc_migratetype, int fallback_migratetype, int new_migratetype), + int alloc_migratetype, int fallback_migratetype), TP_ARGS(page, alloc_order, fallback_order, - alloc_migratetype, fallback_migratetype, new_migratetype), + alloc_migratetype, fallback_migratetype), TP_STRUCT__entry( __field( struct page *, page ) @@ -289,7 +289,8 @@ TRACE_EVENT(mm_page_alloc_extfrag, __entry->fallback_order = fallback_order; __entry->alloc_migratetype = alloc_migratetype; __entry->fallback_migratetype = fallback_migratetype; - __entry->change_ownership = (new_migratetype == alloc_migratetype); + __entry->change_ownership = (alloc_migratetype == + get_pageblock_migratetype(page)); ), TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h index 2f96d233c980..a6c4962e5d46 100644 --- a/include/uapi/linux/kernel-page-flags.h +++ b/include/uapi/linux/kernel-page-flags.h @@ -32,6 +32,7 @@ #define KPF_KSM 21 #define KPF_THP 22 #define KPF_BALLOON 23 +#define KPF_ZERO_PAGE 24 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */ |