diff options
Diffstat (limited to 'include')
44 files changed, 467 insertions, 207 deletions
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a8015a7a55bb..53b2acc38213 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -233,6 +233,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) # define pte_accessible(mm, pte) ((void)(pte), 1) #endif +#ifndef pte_present_nonuma +#define pte_present_nonuma(pte) pte_present(pte) +#endif + #ifndef flush_tlb_fix_spurious_fault #define flush_tlb_fix_spurious_fault(vma, address) flush_tlb_page(vma, address) #endif @@ -670,7 +674,7 @@ static inline int pmd_trans_unstable(pmd_t *pmd) static inline int pte_numa(pte_t pte) { return (pte_flags(pte) & - (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; + (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA; } #endif @@ -678,7 +682,7 @@ static inline int pte_numa(pte_t pte) static inline int pmd_numa(pmd_t pmd) { return (pmd_flags(pmd) & - (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA; + (_PAGE_NUMA|_PAGE_PROTNONE|_PAGE_PRESENT)) == _PAGE_NUMA; } #endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c6f836afa1b..3cd426e971db 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1588,6 +1588,7 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g) struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); + int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, @@ -1606,7 +1607,13 @@ struct block_device_operations { extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); +extern int bdev_read_page(struct block_device *, sector_t, struct page *); +extern int bdev_write_page(struct block_device *, sector_t, struct page *, + struct writeback_control *); #else /* CONFIG_BLOCK */ + +struct block_device; + /* * stubs for when the block layer is configured out */ @@ -1642,6 +1649,12 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) return false; } +static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector) +{ + return 0; +} + #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index db51fe4fe317..4e2bd4c95b66 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -58,9 +58,9 @@ extern void free_bootmem_late(unsigned long physaddr, unsigned long size); * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE, * the architecture-specific code should honor this). * - * If flags is 0, then the return value is always 0 (success). If - * flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the - * memory already was reserved. + * If flags is BOOTMEM_DEFAULT, then the return value is always 0 (success). + * If flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the memory + * already was reserved. */ #define BOOTMEM_DEFAULT 0 #define BOOTMEM_EXCLUSIVE (1<<0) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7cbf837a279c..324329ceea1e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -207,8 +207,6 @@ void block_invalidatepage(struct page *page, unsigned int offset, unsigned int length); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); -int block_write_full_page_endio(struct page *page, get_block_t *get_block, - struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, unsigned long from, unsigned long count); diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 7e1c76e3cd68..01e3132820da 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -22,7 +22,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write, extern int fragmentation_index(struct zone *zone, unsigned int order); extern unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask, - bool sync, bool *contended); + enum migrate_mode mode, bool *contended); extern void compact_pgdat(pg_data_t *pgdat, int order); extern void reset_isolation_suitable(pg_data_t *pgdat); extern unsigned long compaction_suitable(struct zone *zone, int order); @@ -91,7 +91,7 @@ static inline bool compaction_restarting(struct zone *zone, int order) #else static inline unsigned long try_to_compact_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *nodemask, - bool sync, bool *contended) + enum migrate_mode mode, bool *contended) { return COMPACT_CONTINUE; } diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ee7239ea1583..64fdfe1cfcf0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -323,9 +323,18 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #endif #ifndef __compiletime_error # define __compiletime_error(message) -# define __compiletime_error_fallback(condition) \ +/* + * Sparse complains of variable sized arrays due to the temporary variable in + * __compiletime_assert. Unfortunately we can't just expand it out to make + * sparse see a constant array size without breaking compiletime_assert on old + * versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether. + */ +# ifndef __CHECKER__ +# define __compiletime_error_fallback(condition) \ do { ((void)sizeof(char[1 - 2 * condition])); } while (0) -#else +# endif +#endif +#ifndef __compiletime_error_fallback # define __compiletime_error_fallback(condition) do { } while (0) #endif diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index b19d3dc2e651..ade2390ffe92 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -12,10 +12,31 @@ #include <linux/cpumask.h> #include <linux/nodemask.h> #include <linux/mm.h> +#include <linux/jump_label.h> #ifdef CONFIG_CPUSETS -extern int number_of_cpusets; /* How many cpusets are defined in system? */ +extern struct static_key cpusets_enabled_key; +static inline bool cpusets_enabled(void) +{ + return static_key_false(&cpusets_enabled_key); +} + +static inline int nr_cpusets(void) +{ + /* jump label reference count + the top-level cpuset */ + return static_key_count(&cpusets_enabled_key) + 1; +} + +static inline void cpuset_inc(void) +{ + static_key_slow_inc(&cpusets_enabled_key); +} + +static inline void cpuset_dec(void) +{ + static_key_slow_dec(&cpusets_enabled_key); +} extern int cpuset_init(void); extern void cpuset_init_smp(void); @@ -32,13 +53,13 @@ extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask); static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_softwall(node, gfp_mask); } static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask) { - return number_of_cpusets <= 1 || + return nr_cpusets() <= 1 || __cpuset_node_allowed_hardwall(node, gfp_mask); } @@ -124,6 +145,8 @@ static inline void set_mems_allowed(nodemask_t nodemask) #else /* !CONFIG_CPUSETS */ +static inline bool cpusets_enabled(void) { return false; } + static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 3b28f937d959..772eab5d524a 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -88,7 +88,8 @@ static inline void dma_contiguous_set_default(struct cma *cma) void dma_contiguous_reserve(phys_addr_t addr_limit); int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma); + phys_addr_t limit, struct cma **res_cma, + bool fixed); /** * dma_declare_contiguous() - reserve area for contiguous memory handling @@ -108,7 +109,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, { struct cma *cma; int ret; - ret = dma_contiguous_reserve_area(size, base, limit, &cma); + ret = dma_contiguous_reserve_area(size, base, limit, &cma, true); if (ret == 0) dev_set_cma_area(dev, cma); @@ -136,7 +137,9 @@ static inline void dma_contiguous_set_default(struct cma *cma) { } static inline void dma_contiguous_reserve(phys_addr_t limit) { } static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma) { + phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ return -ENOSYS; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 878031227c57..c3f46e499dd0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2590,6 +2590,7 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count, extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, const void __user *from, size_t count); +extern int __generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 39b81dc7d01a..6eb1fb37de9a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -6,7 +6,6 @@ #include <linux/stddef.h> #include <linux/linkage.h> #include <linux/topology.h> -#include <linux/mmdebug.h> struct vm_area_struct; @@ -31,7 +30,6 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u #define ___GFP_RECLAIMABLE 0x80000u -#define ___GFP_KMEMCG 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_NO_KSWAPD 0x400000u #define ___GFP_OTHER_NODE 0x800000u @@ -91,7 +89,6 @@ struct vm_area_struct; #define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ -#define __GFP_KMEMCG ((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ /* @@ -353,6 +350,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_page_vma_node(gfp_mask, vma, addr, node) \ alloc_pages_vma(gfp_mask, 0, vma, addr, node) +extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); +extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, + unsigned int order); + extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); @@ -369,11 +370,11 @@ void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -extern void free_hot_cold_page(struct page *page, int cold); -extern void free_hot_cold_page_list(struct list_head *list, int cold); +extern void free_hot_cold_page(struct page *page, bool cold); +extern void free_hot_cold_page_list(struct list_head *list, bool cold); -extern void __free_memcg_kmem_pages(struct page *page, unsigned int order); -extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order); +extern void __free_kmem_pages(struct page *page, unsigned int order); +extern void free_kmem_pages(unsigned long addr, unsigned int order); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr), 0) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b65166de1d9d..255cd5cc0754 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -343,6 +343,11 @@ static inline unsigned huge_page_shift(struct hstate *h) return h->order + PAGE_SHIFT; } +static inline bool hstate_is_gigantic(struct hstate *h) +{ + return huge_page_order(h) >= MAX_ORDER; +} + static inline unsigned int pages_per_huge_page(struct hstate *h) { return 1 << h->order; @@ -392,15 +397,13 @@ static inline pgoff_t basepage_index(struct page *page) extern void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); -int pmd_huge_support(void); -/* - * Currently hugepage migration is enabled only for pmd-based hugepage. - * This function will be updated when hugepage migration is more widely - * supported. - */ -static inline int hugepage_migration_support(struct hstate *h) +static inline int hugepage_migration_supported(struct hstate *h) { - return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT); +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION + return huge_page_shift(h) == PMD_SHIFT; +#else + return 0; +#endif } static inline spinlock_t *huge_pte_lockptr(struct hstate *h, @@ -450,8 +453,7 @@ static inline pgoff_t basepage_index(struct page *page) return page->index; } #define dissolve_free_huge_pages(s, e) do {} while (0) -#define pmd_huge_support() 0 -#define hugepage_migration_support(h) 0 +#define hugepage_migration_supported(h) 0 static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 5c1dfb2a9e73..784304b222b3 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -69,6 +69,10 @@ struct static_key { # include <asm/jump_label.h> # define HAVE_JUMP_LABEL +#else +struct static_key { + atomic_t enabled; +}; #endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */ enum jump_label_type { @@ -79,6 +83,12 @@ enum jump_label_type { struct module; #include <linux/atomic.h> + +static inline int static_key_count(struct static_key *key) +{ + return atomic_read(&key->enabled); +} + #ifdef HAVE_JUMP_LABEL #define JUMP_LABEL_TYPE_FALSE_BRANCH 0UL @@ -134,10 +144,6 @@ extern void jump_label_apply_nops(struct module *mod); #else /* !HAVE_JUMP_LABEL */ -struct static_key { - atomic_t enabled; -}; - static __always_inline void jump_label_init(void) { static_key_initialized = true; @@ -145,14 +151,14 @@ static __always_inline void jump_label_init(void) static __always_inline bool static_key_false(struct static_key *key) { - if (unlikely(atomic_read(&key->enabled) > 0)) + if (unlikely(static_key_count(key) > 0)) return true; return false; } static __always_inline bool static_key_true(struct static_key *key) { - if (likely(atomic_read(&key->enabled) > 0)) + if (likely(static_key_count(key) > 0)) return true; return false; } @@ -194,7 +200,7 @@ static inline int jump_label_apply_nops(struct module *mod) static inline bool static_key_enabled(struct static_key *key) { - return (atomic_read(&key->enabled) > 0); + return static_key_count(key) > 0; } #endif /* _LINUX_JUMP_LABEL_H */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 73dc382e72d8..b660e05b63d4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -272,6 +272,8 @@ static inline bool memblock_bottom_up(void) { return false; } #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 +phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align, + phys_addr_t start, phys_addr_t end); phys_addr_t memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys_addr_t max_addr); phys_addr_t __memblock_alloc_base(phys_addr_t size, phys_addr_t align, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b569b8be5c5a..eb65d29516ca 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -492,13 +492,9 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order); int memcg_cache_id(struct mem_cgroup *memcg); -char *memcg_create_cache_name(struct mem_cgroup *memcg, - struct kmem_cache *root_cache); int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache); void memcg_free_cache_params(struct kmem_cache *s); -void memcg_register_cache(struct kmem_cache *s); -void memcg_unregister_cache(struct kmem_cache *s); int memcg_update_cache_size(struct kmem_cache *s, int num_groups); void memcg_update_array_size(int num_groups); @@ -506,8 +502,10 @@ void memcg_update_array_size(int num_groups); struct kmem_cache * __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); -void mem_cgroup_destroy_cache(struct kmem_cache *cachep); -int __kmem_cache_destroy_memcg_children(struct kmem_cache *s); +int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order); +void __memcg_uncharge_slab(struct kmem_cache *cachep, int order); + +int __memcg_cleanup_cache_params(struct kmem_cache *s); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. @@ -534,7 +532,7 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) * res_counter_charge_nofail, but we hope those allocations are rare, * and won't be worth the trouble. */ - if (!(gfp & __GFP_KMEMCG) || (gfp & __GFP_NOFAIL)) + if (gfp & __GFP_NOFAIL) return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; @@ -583,17 +581,7 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) * @cachep: the original global kmem cache * @gfp: allocation flags. * - * This function assumes that the task allocating, which determines the memcg - * in the page allocator, belongs to the same cgroup throughout the whole - * process. Misacounting can happen if the task calls memcg_kmem_get_cache() - * while belonging to a cgroup, and later on changes. This is considered - * acceptable, and should only happen upon task migration. - * - * Before the cache is created by the memcg core, there is also a possible - * imbalance: the task belongs to a memcg, but the cache being allocated from - * is the global cache, since the child cache is not yet guaranteed to be - * ready. This case is also fine, since in this case the GFP_KMEMCG will not be - * passed and the page allocator will not attempt any cgroup accounting. + * All memory allocated from a per-memcg cache is charged to the owner memcg. */ static __always_inline struct kmem_cache * memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) @@ -648,14 +636,6 @@ static inline void memcg_free_cache_params(struct kmem_cache *s) { } -static inline void memcg_register_cache(struct kmem_cache *s) -{ -} - -static inline void memcg_unregister_cache(struct kmem_cache *s) -{ -} - static inline struct kmem_cache * memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 4ca3d951fe91..010d125bffbf 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -187,14 +187,8 @@ extern void put_page_bootmem(struct page *page); extern void get_page_bootmem(unsigned long ingo, struct page *page, unsigned long type); -/* - * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug - * notifier will be called under this. 2) offline/online/add/remove memory - * will not run simultaneously. - */ - -void lock_memory_hotplug(void); -void unlock_memory_hotplug(void); +void get_online_mems(void); +void put_online_mems(void); #else /* ! CONFIG_MEMORY_HOTPLUG */ /* @@ -232,8 +226,8 @@ static inline int try_online_node(int nid) return 0; } -static inline void lock_memory_hotplug(void) {} -static inline void unlock_memory_hotplug(void) {} +static inline void get_online_mems(void) {} +static inline void put_online_mems(void) {} #endif /* ! CONFIG_MEMORY_HOTPLUG */ diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 3c1b968da0ca..f230a978e6ba 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -175,6 +175,12 @@ static inline int vma_migratable(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_IO | VM_PFNMAP)) return 0; + +#ifndef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION + if (vma->vm_flags & VM_HUGETLB) + return 0; +#endif + /* * Migration allocates pages in the highest zone. If we cannot * do so then migration (at least from node to node) is not diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 84a31ad0b791..a2901c414664 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -5,7 +5,9 @@ #include <linux/mempolicy.h> #include <linux/migrate_mode.h> -typedef struct page *new_page_t(struct page *, unsigned long private, int **); +typedef struct page *new_page_t(struct page *page, unsigned long private, + int **reason); +typedef void free_page_t(struct page *page, unsigned long private); /* * Return values from addresss_space_operations.migratepage(): @@ -38,7 +40,7 @@ enum migrate_reason { extern void putback_movable_pages(struct list_head *l); extern int migrate_page(struct address_space *, struct page *, struct page *, enum migrate_mode); -extern int migrate_pages(struct list_head *l, new_page_t x, +extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); extern int migrate_prep(void); @@ -56,8 +58,9 @@ extern int migrate_page_move_mapping(struct address_space *mapping, #else static inline void putback_movable_pages(struct list_head *l) {} -static inline int migrate_pages(struct list_head *l, new_page_t x, - unsigned long private, enum migrate_mode mode, int reason) +static inline int migrate_pages(struct list_head *l, new_page_t new, + free_page_t free, unsigned long private, enum migrate_mode mode, + int reason) { return -ENOSYS; } static inline int migrate_prep(void) { return -ENOSYS; } diff --git a/include/linux/mm.h b/include/linux/mm.h index d6777060449f..368600628d14 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -407,20 +407,25 @@ static inline void compound_unlock_irqrestore(struct page *page, #endif } +static inline struct page *compound_head_by_tail(struct page *tail) +{ + struct page *head = tail->first_page; + + /* + * page->first_page may be a dangling pointer to an old + * compound page, so recheck that it is still a tail + * page before returning. + */ + smp_rmb(); + if (likely(PageTail(tail))) + return head; + return tail; +} + static inline struct page *compound_head(struct page *page) { - if (unlikely(PageTail(page))) { - struct page *head = page->first_page; - - /* - * page->first_page may be a dangling pointer to an old - * compound page, so recheck that it is still a tail - * page before returning. - */ - smp_rmb(); - if (likely(PageTail(page))) - return head; - } + if (unlikely(PageTail(page))) + return compound_head_by_tail(page); return page; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8967e20cbe57..de1627232af0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -406,7 +406,7 @@ struct mm_struct { spinlock_t ioctx_lock; struct kioctx_table __rcu *ioctx_table; #endif -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG /* * "owner" points to a task that is regarded as the canonical * user/owner of this mm. All of the following must be true in diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2d57efa64cc1..edd82a105220 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -1,6 +1,8 @@ #ifndef LINUX_MM_DEBUG_H #define LINUX_MM_DEBUG_H 1 +#include <linux/stringify.h> + struct page; extern void dump_page(struct page *page, const char *reason); @@ -9,11 +11,20 @@ extern void dump_page_badflags(struct page *page, const char *reason, #ifdef CONFIG_DEBUG_VM #define VM_BUG_ON(cond) BUG_ON(cond) -#define VM_BUG_ON_PAGE(cond, page) \ - do { if (unlikely(cond)) { dump_page(page, NULL); BUG(); } } while (0) +#define VM_BUG_ON_PAGE(cond, page) \ + do { \ + if (unlikely(cond)) { \ + dump_page(page, "VM_BUG_ON_PAGE(" __stringify(cond)")");\ + BUG(); \ + } \ + } while (0) +#define VM_WARN_ON(cond) WARN_ON(cond) +#define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond) #else #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond) +#define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) #endif #ifdef CONFIG_DEBUG_VIRTUAL diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fac5509c18f0..6cbd1b6c3d20 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -75,9 +75,18 @@ enum { extern int page_group_by_mobility_disabled; -static inline int get_pageblock_migratetype(struct page *page) +#define NR_MIGRATETYPE_BITS (PB_migrate_end - PB_migrate + 1) +#define MIGRATETYPE_MASK ((1UL << NR_MIGRATETYPE_BITS) - 1) + +#define get_pageblock_migratetype(page) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + PB_migrate_end, MIGRATETYPE_MASK) + +static inline int get_pfnblock_migratetype(struct page *page, unsigned long pfn) { - return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end); + BUILD_BUG_ON(PB_migrate_end - PB_migrate != 2); + return get_pfnblock_flags_mask(page, pfn, PB_migrate_end, + MIGRATETYPE_MASK); } struct free_area { @@ -360,9 +369,10 @@ struct zone { /* Set to true when the PG_migrate_skip bits should be cleared */ bool compact_blockskip_flush; - /* pfns where compaction scanners should start */ + /* pfn where compaction free scanner should start */ unsigned long compact_cached_free_pfn; - unsigned long compact_cached_migrate_pfn; + /* pfn where async and sync compaction migration scanner should start */ + unsigned long compact_cached_migrate_pfn[2]; #endif #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ @@ -481,9 +491,8 @@ struct zone { * give them a chance of being in the same cacheline. * * Write access to present_pages at runtime should be protected by - * lock_memory_hotplug()/unlock_memory_hotplug(). Any reader who can't - * tolerant drift of present_pages should hold memory hotplug lock to - * get a stable value. + * mem_hotplug_begin/end(). Any reader who can't tolerant drift of + * present_pages should get_online_mems() to get a stable value. * * Read access to managed_pages should be safe because it's unsigned * long. Write access to zone->managed_pages and totalram_pages are @@ -763,10 +772,10 @@ typedef struct pglist_data { unsigned long node_spanned_pages; /* total size of physical page range, including holes */ int node_id; - nodemask_t reclaim_nodes; /* Nodes allowed to reclaim from */ wait_queue_head_t kswapd_wait; wait_queue_head_t pfmemalloc_wait; - struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ + struct task_struct *kswapd; /* Protected by + mem_hotplug_begin/end() */ int kswapd_max_order; enum zone_type classzone_idx; #ifdef CONFIG_NUMA_BALANCING @@ -808,10 +817,10 @@ static inline bool pgdat_is_empty(pg_data_t *pgdat) extern struct mutex zonelists_mutex; void build_all_zonelists(pg_data_t *pgdat, struct zone *zone); void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); -bool zone_watermark_ok(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); -bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, - int classzone_idx, int alloc_flags); +bool zone_watermark_ok(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); +bool zone_watermark_ok_safe(struct zone *z, unsigned int order, + unsigned long mark, int classzone_idx, int alloc_flags); enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index d1fe1a761047..2093eb72785e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -198,6 +198,7 @@ struct page; /* forward declaration */ TESTPAGEFLAG(Locked, locked) PAGEFLAG(Error, error) TESTCLEARFLAG(Error, error) PAGEFLAG(Referenced, referenced) TESTCLEARFLAG(Referenced, referenced) + __SETPAGEFLAG(Referenced, referenced) PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty) PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru) PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active) @@ -208,6 +209,7 @@ PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */ PAGEFLAG(SavePinned, savepinned); /* Xen */ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved) PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked) + __SETPAGEFLAG(SwapBacked, swapbacked) __PAGEFLAG(SlobFree, slob_free) diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 2ee8cd2466b5..2baeee12f48e 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -30,9 +30,12 @@ enum pageblock_bits { PB_migrate, PB_migrate_end = PB_migrate + 3 - 1, /* 3 bits required for migrate types */ -#ifdef CONFIG_COMPACTION PB_migrate_skip,/* If set the block is skipped by compaction */ -#endif /* CONFIG_COMPACTION */ + + /* + * Assume the bits will always align on a word. If this assumption + * changes then get/set pageblock needs updating. + */ NR_PAGEBLOCK_BITS }; @@ -62,11 +65,26 @@ extern int pageblock_order; /* Forward declaration */ struct page; +unsigned long get_pfnblock_flags_mask(struct page *page, + unsigned long pfn, + unsigned long end_bitidx, + unsigned long mask); + +void set_pfnblock_flags_mask(struct page *page, + unsigned long flags, + unsigned long pfn, + unsigned long end_bitidx, + unsigned long mask); + /* Declarations for getting and setting flags. See mm/page_alloc.c */ -unsigned long get_pageblock_flags_group(struct page *page, - int start_bitidx, int end_bitidx); -void set_pageblock_flags_group(struct page *page, unsigned long flags, - int start_bitidx, int end_bitidx); +#define get_pageblock_flags_group(page, start_bitidx, end_bitidx) \ + get_pfnblock_flags_mask(page, page_to_pfn(page), \ + end_bitidx, \ + (1 << (end_bitidx - start_bitidx + 1)) - 1) +#define set_pageblock_flags_group(page, flags, start_bitidx, end_bitidx) \ + set_pfnblock_flags_mask(page, flags, page_to_pfn(page), \ + end_bitidx, \ + (1 << (end_bitidx - start_bitidx + 1)) - 1) #ifdef CONFIG_COMPACTION #define get_pageblock_skip(page) \ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 45598f1e9aa3..0a97b583ee8d 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -110,7 +110,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) #define page_cache_get(page) get_page(page) #define page_cache_release(page) put_page(page) -void release_pages(struct page **pages, int nr, int cold); +void release_pages(struct page **pages, int nr, bool cold); /* * speculatively take a reference to a page. @@ -259,12 +259,109 @@ pgoff_t page_cache_next_hole(struct address_space *mapping, pgoff_t page_cache_prev_hole(struct address_space *mapping, pgoff_t index, unsigned long max_scan); +#define FGP_ACCESSED 0x00000001 +#define FGP_LOCK 0x00000002 +#define FGP_CREAT 0x00000004 +#define FGP_WRITE 0x00000008 +#define FGP_NOFS 0x00000010 +#define FGP_NOWAIT 0x00000020 + +struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, + int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask); + +/** + * find_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned with an increased refcount. + * + * Otherwise, %NULL is returned. + */ +static inline struct page *find_get_page(struct address_space *mapping, + pgoff_t offset) +{ + return pagecache_get_page(mapping, offset, 0, 0, 0); +} + +static inline struct page *find_get_page_flags(struct address_space *mapping, + pgoff_t offset, int fgp_flags) +{ + return pagecache_get_page(mapping, offset, fgp_flags, 0, 0); +} + +/** + * find_lock_page - locate, pin and lock a pagecache page + * pagecache_get_page - find and get a page reference + * @mapping: the address_space to search + * @offset: the page index + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. + * + * Otherwise, %NULL is returned. + * + * find_lock_page() may sleep. + */ +static inline struct page *find_lock_page(struct address_space *mapping, + pgoff_t offset) +{ + return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0); +} + +/** + * find_or_create_page - locate or add a pagecache page + * @mapping: the page's address_space + * @index: the page's index into the mapping + * @gfp_mask: page allocation mode + * + * Looks up the page cache slot at @mapping & @offset. If there is a + * page cache page, it is returned locked and with an increased + * refcount. + * + * If the page is not present, a new page is allocated using @gfp_mask + * and added to the page cache and the VM's LRU list. The page is + * returned locked and with an increased refcount. + * + * On memory exhaustion, %NULL is returned. + * + * find_or_create_page() may sleep, even if @gfp_flags specifies an + * atomic allocation! + */ +static inline struct page *find_or_create_page(struct address_space *mapping, + pgoff_t offset, gfp_t gfp_mask) +{ + return pagecache_get_page(mapping, offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT, + gfp_mask, gfp_mask & GFP_RECLAIM_MASK); +} + +/** + * grab_cache_page_nowait - returns locked page at given index in given cache + * @mapping: target address_space + * @index: the page index + * + * Same as grab_cache_page(), but do not wait if the page is unavailable. + * This is intended for speculative data generators, where the data can + * be regenerated if the page couldn't be grabbed. This routine should + * be safe to call while holding the lock for another page. + * + * Clear __GFP_FS when allocating the page to avoid recursion into the fs + * and deadlock against the caller's locked page. + */ +static inline struct page *grab_cache_page_nowait(struct address_space *mapping, + pgoff_t index) +{ + return pagecache_get_page(mapping, index, + FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, + mapping_gfp_mask(mapping), + GFP_NOFS); +} + struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_get_page(struct address_space *mapping, pgoff_t offset); struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_lock_page(struct address_space *mapping, pgoff_t offset); -struct page *find_or_create_page(struct address_space *mapping, pgoff_t index, - gfp_t gfp_mask); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); @@ -287,8 +384,6 @@ static inline struct page *grab_cache_page(struct address_space *mapping, return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } -extern struct page * grab_cache_page_nowait(struct address_space *mapping, - pgoff_t index); extern struct page * read_cache_page(struct address_space *mapping, pgoff_t index, filler_t *filler, void *data); extern struct page * read_cache_page_gfp(struct address_space *mapping, @@ -425,6 +520,8 @@ static inline void wait_on_page_writeback(struct page *page) extern void end_page_writeback(struct page *page); void wait_for_stable_page(struct page *page); +void page_endio(struct page *page, int rw, int err); + /* * Add an arbitrary waiter to a page's wait queue */ diff --git a/include/linux/plist.h b/include/linux/plist.h index aa0fb390bd29..8b6c970cff6c 100644 --- a/include/linux/plist.h +++ b/include/linux/plist.h @@ -98,6 +98,13 @@ struct plist_node { } /** + * PLIST_HEAD - declare and init plist_head + * @head: name for struct plist_head variable + */ +#define PLIST_HEAD(head) \ + struct plist_head head = PLIST_HEAD_INIT(head) + +/** * PLIST_NODE_INIT - static struct plist_node initializer * @node: struct plist_node variable name * @__prio: initial node priority @@ -134,6 +141,8 @@ static inline void plist_node_init(struct plist_node *node, int prio) extern void plist_add(struct plist_node *node, struct plist_head *head); extern void plist_del(struct plist_node *node, struct plist_head *head); +extern void plist_requeue(struct plist_node *node, struct plist_head *head); + /** * plist_for_each - iterate over the plist * @pos: the type * to use as a loop counter @@ -143,6 +152,16 @@ extern void plist_del(struct plist_node *node, struct plist_head *head); list_for_each_entry(pos, &(head)->node_list, node_list) /** + * plist_for_each_continue - continue iteration over the plist + * @pos: the type * to use as a loop cursor + * @head: the head for your list + * + * Continue to iterate over plist, continuing after the current position. + */ +#define plist_for_each_continue(pos, head) \ + list_for_each_entry_continue(pos, &(head)->node_list, node_list) + +/** * plist_for_each_safe - iterate safely over a plist of given type * @pos: the type * to use as a loop counter * @n: another type * to use as temporary storage @@ -163,6 +182,18 @@ extern void plist_del(struct plist_node *node, struct plist_head *head); list_for_each_entry(pos, &(head)->node_list, mem.node_list) /** + * plist_for_each_entry_continue - continue iteration over list of given type + * @pos: the type * to use as a loop cursor + * @head: the head for your list + * @m: the name of the list_struct within the struct + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define plist_for_each_entry_continue(pos, head, m) \ + list_for_each_entry_continue(pos, &(head)->node_list, m.node_list) + +/** * plist_for_each_entry_safe - iterate safely over list of given type * @pos: the type * to use as a loop counter * @n: another type * to use as temporary storage @@ -229,6 +260,20 @@ static inline int plist_node_empty(const struct plist_node *node) #endif /** + * plist_next - get the next entry in list + * @pos: the type * to cursor + */ +#define plist_next(pos) \ + list_next_entry(pos, node_list) + +/** + * plist_prev - get the prev entry in list + * @pos: the type * to cursor + */ +#define plist_prev(pos) \ + list_prev_entry(pos, node_list) + +/** * plist_first - return the first node (and thus, highest priority) * @head: the &struct plist_head pointer * diff --git a/include/linux/printk.h b/include/linux/printk.h index 8752f7595b27..319ff7e53efb 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -30,6 +30,17 @@ static inline const char *printk_skip_level(const char *buffer) return buffer; } +/* printk's without a loglevel use this.. */ +#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL + +/* We show everything that is MORE important than this.. */ +#define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ +#define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ +#define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ +#define CONSOLE_LOGLEVEL_DEFAULT 7 /* anything MORE serious than KERN_DEBUG */ +#define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ +#define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ + extern int console_printk[]; #define console_loglevel (console_printk[0]) @@ -39,13 +50,13 @@ extern int console_printk[]; static inline void console_silent(void) { - console_loglevel = 0; + console_loglevel = CONSOLE_LOGLEVEL_SILENT; } static inline void console_verbose(void) { if (console_loglevel) - console_loglevel = 15; + console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; } struct va_format { @@ -128,9 +139,9 @@ asmlinkage __printf(1, 2) __cold int printk(const char *fmt, ...); /* - * Special printk facility for scheduler use only, _DO_NOT_USE_ ! + * Special printk facility for scheduler/timekeeping use only, _DO_NOT_USE_ ! */ -__printf(1, 2) __cold int printk_sched(const char *fmt, ...); +__printf(1, 2) __cold int printk_deferred(const char *fmt, ...); /* * Please don't use printk_ratelimit(), because it shares ratelimiting state @@ -165,7 +176,7 @@ int printk(const char *s, ...) return 0; } static inline __printf(1, 2) __cold -int printk_sched(const char *s, ...) +int printk_deferred(const char *s, ...) { return 0; } @@ -210,6 +221,12 @@ extern asmlinkage void dump_stack(void) __cold; #define pr_fmt(fmt) fmt #endif +/* + * These can be used to print at the various log levels. + * All of these will print unconditionally, although note that pr_debug() + * and other debug macros are compiled out unless either DEBUG is defined + * or CONFIG_DYNAMIC_DEBUG is set. + */ #define pr_emerg(fmt, ...) \ printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) #define pr_alert(fmt, ...) \ @@ -266,9 +283,20 @@ extern asmlinkage void dump_stack(void) __cold; printk(fmt, ##__VA_ARGS__); \ } \ }) +#define printk_deferred_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk_deferred(fmt, ##__VA_ARGS__); \ + } \ +}) #else #define printk_once(fmt, ...) \ no_printk(fmt, ##__VA_ARGS__) +#define printk_deferred_once(fmt, ...) \ + no_printk(fmt, ##__VA_ARGS__) #endif #define pr_emerg_once(fmt, ...) \ diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 608e60a74c3c..9d117f61d976 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -44,6 +44,10 @@ extern int remove_proc_subtree(const char *, struct proc_dir_entry *); #else /* CONFIG_PROC_FS */ +static inline void proc_root_init(void) +{ +} + static inline void proc_flush_task(struct task_struct *task) { } diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b66c2110cb1f..be574506e6a9 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -72,10 +72,9 @@ struct anon_vma_chain { }; enum ttu_flags { - TTU_UNMAP = 0, /* unmap mode */ - TTU_MIGRATION = 1, /* migration mode */ - TTU_MUNLOCK = 2, /* munlock mode */ - TTU_ACTION_MASK = 0xff, + TTU_UNMAP = 1, /* unmap mode */ + TTU_MIGRATION = 2, /* migration mode */ + TTU_MUNLOCK = 4, /* munlock mode */ TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ @@ -183,14 +182,10 @@ static inline void page_dup_rmap(struct page *page) */ int page_referenced(struct page *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -int page_referenced_one(struct page *, struct vm_area_struct *, - unsigned long address, void *arg); #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) int try_to_unmap(struct page *, enum ttu_flags flags); -int try_to_unmap_one(struct page *, struct vm_area_struct *, - unsigned long address, void *arg); /* * Called from mm/filemap_xip.c to unmap empty zero page diff --git a/include/linux/sched.h b/include/linux/sched.h index 70f67e4e6156..8fcd0e6098d9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -137,12 +137,6 @@ struct filename; #define VMACACHE_MASK (VMACACHE_SIZE - 1) /* - * List of flags we want to share for kernel threads, - * if only because they are not used by them anyway. - */ -#define CLONE_KERNEL (CLONE_FS | CLONE_FILES | CLONE_SIGHAND) - -/* * These are the constant used to fake the fixed-point load-average * counting. Some notes: * - 11 bit fractions expand to 22 bits by the multiplies: this gives @@ -745,7 +739,6 @@ static inline int signal_group_exit(const struct signal_struct *sig) struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ - atomic_t files; /* How many open files does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_INOTIFY_USER atomic_t inotify_watches; /* How many inotify watches does this user have? */ @@ -2967,7 +2960,7 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); #else @@ -2978,7 +2971,7 @@ static inline void mm_update_next_owner(struct mm_struct *mm) static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ static inline unsigned long task_rlimit(const struct task_struct *tsk, unsigned int limit) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 8045a554cafb..596a0e007c62 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -25,6 +25,10 @@ enum { sysctl_hung_task_timeout_secs = 0 }; * Because the kernel adds some informative sections to a image of program at * generating coredump, we need some margin. The number of extra sections is * 1-3 now and depends on arch. We use "5" as safe margin, here. + * + * ELF extended numbering allows more than 65535 sections, so 16-bit bound is + * not a hard limit any more. Although some userspace tools can be surprised by + * that. */ #define MAPCOUNT_ELF_CORE_MARGIN (5) #define DEFAULT_MAX_MAP_COUNT (USHRT_MAX - MAPCOUNT_ELF_CORE_MARGIN) diff --git a/include/linux/slab.h b/include/linux/slab.h index 307bfbe62387..1d9abb7d22a0 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -116,7 +116,9 @@ struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); #ifdef CONFIG_MEMCG_KMEM -void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *); +struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *, + struct kmem_cache *, + const char *); #endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); @@ -369,16 +371,7 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, #include <linux/slub_def.h> #endif -static __always_inline void * -kmalloc_order(size_t size, gfp_t flags, unsigned int order) -{ - void *ret; - - flags |= (__GFP_COMP | __GFP_KMEMCG); - ret = (void *) __get_free_pages(flags, order); - kmemleak_alloc(ret, size, 1, flags); - return ret; -} +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order); #ifdef CONFIG_TRACING extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); @@ -533,10 +526,7 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) * @memcg: pointer to the memcg this cache belongs to * @list: list_head for the list of all caches in this memcg * @root_cache: pointer to the global, root cache, this cache was derived from - * @dead: set to true after the memcg dies; the cache may still be around. * @nr_pages: number of pages that belongs to this cache. - * @destroy: worker to be called whenever we are ready, or believe we may be - * ready, to destroy this cache. */ struct memcg_cache_params { bool is_root_cache; @@ -549,9 +539,7 @@ struct memcg_cache_params { struct mem_cgroup *memcg; struct list_head list; struct kmem_cache *root_cache; - bool dead; atomic_t nr_pages; - struct work_struct destroy; }; }; }; diff --git a/include/linux/swap.h b/include/linux/swap.h index 350711560753..4bdbee80eede 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -166,10 +166,10 @@ enum { #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX /* - * Ratio between the present memory in the zone and the "gap" that - * we're allowing kswapd to shrink in addition to the per-zone high - * wmark, even for zones that already have the high wmark satisfied, - * in order to provide better per-zone lru behavior. We are ok to + * Ratio between zone->managed_pages and the "gap" that above the per-zone + * "high_wmark". While balancing nodes, We allow kswapd to shrink zones that + * do not meet the (high_wmark + gap) watermark, even which already met the + * high_wmark, in order to provide better per-zone lru behavior. We are ok to * spend not more than 1% of the memory for this zone balancing "gap". */ #define KSWAPD_ZONE_BALANCE_GAP_RATIO 100 @@ -214,8 +214,9 @@ struct percpu_cluster { struct swap_info_struct { unsigned long flags; /* SWP_USED etc: see above */ signed short prio; /* swap priority of this type */ + struct plist_node list; /* entry in swap_active_head */ + struct plist_node avail_list; /* entry in swap_avail_head */ signed char type; /* strange name for an index */ - signed char next; /* next type on the swap list */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ @@ -255,11 +256,6 @@ struct swap_info_struct { struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */ }; -struct swap_list_t { - int head; /* head of priority-ordered swapfile list */ - int next; /* swapfile to be used next */ -}; - /* linux/mm/workingset.c */ void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); @@ -308,12 +304,14 @@ extern unsigned long nr_free_pagecache_pages(void); /* linux/mm/swap.c */ -extern void __lru_cache_add(struct page *); extern void lru_cache_add(struct page *); +extern void lru_cache_add_anon(struct page *page); +extern void lru_cache_add_file(struct page *page); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); +extern void init_page_accessed(struct page *page); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); @@ -323,22 +321,6 @@ extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); -/** - * lru_cache_add: add a page to the page lists - * @page: the page to add - */ -static inline void lru_cache_add_anon(struct page *page) -{ - ClearPageActive(page); - __lru_cache_add(page); -} - -static inline void lru_cache_add_file(struct page *page) -{ - ClearPageActive(page); - __lru_cache_add(page); -} - /* linux/mm/vmscan.c */ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); @@ -496,7 +478,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #define free_page_and_swap_cache(page) \ page_cache_release(page) #define free_pages_and_swap_cache(pages, nr) \ - release_pages((pages), (nr), 0); + release_pages((pages), (nr), false); static inline void show_swap_cache_info(void) { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index e282624e8c10..388293a91e8c 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -6,7 +6,7 @@ * want to expose them to the dozens of source files that include swap.h */ extern spinlock_t swap_lock; -extern struct swap_list_t swap_list; +extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; extern int try_to_unuse(unsigned int, bool, unsigned long); diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c0f75261a728..6adfb7bfbf44 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -54,7 +54,7 @@ static inline pgoff_t swp_offset(swp_entry_t entry) /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { - return !pte_none(pte) && !pte_present(pte) && !pte_file(pte); + return !pte_none(pte) && !pte_present_nonuma(pte) && !pte_file(pte); } #endif diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index a5ffd32642fd..e7a018eaf3a2 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -116,4 +116,6 @@ static inline void swiotlb_free(void) { } #endif extern void swiotlb_print_info(void); +extern int is_swiotlb_buffer(phys_addr_t paddr); + #endif /* __LINUX_SWIOTLB_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a4a0588c5397..b0881a0ed322 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -711,7 +711,7 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); -asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, +asmlinkage long sys_set_mempolicy(int mode, const unsigned long __user *nmask, unsigned long maxnode); asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *from, @@ -723,7 +723,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, int flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, - unsigned long __user *nmask, + const unsigned long __user *nmask, unsigned long maxnode, unsigned flags); asmlinkage long sys_get_mempolicy(int __user *policy, diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index cb0cec94fda3..ff307b548ed3 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -61,8 +61,6 @@ extern long do_no_restart_syscall(struct restart_block *parm); # define THREADINFO_GFP (GFP_KERNEL | __GFP_NOTRACK) #endif -#define THREADINFO_GFP_ACCOUNTED (THREADINFO_GFP | __GFP_KMEMCG) - /* * flag set/clear/test wrappers * - pass TIF_xxxx constants to these functions diff --git a/include/linux/topology.h b/include/linux/topology.h index 973671ff9e7d..dda6ee521e74 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -58,7 +58,8 @@ int arch_update_cpu_topology(void); /* * If the distance between nodes in a system is larger than RECLAIM_DISTANCE * (in whatever arch specific measurement units returned by node_distance()) - * then switch on zone reclaim on boot. + * and zone_reclaim_mode is enabled then the VM will only call zone_reclaim() + * on nodes within this distance. */ #define RECLAIM_DISTANCE 30 #endif diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 486c3972c0be..ced92345c963 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -80,6 +80,10 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, #endif /* CONFIG_DEBUG_TLBFLUSH */ +#ifdef CONFIG_DEBUG_VM_VMACACHE + VMACACHE_FIND_CALLS, + VMACACHE_FIND_HITS, +#endif NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 45c9cd1daf7a..82e7db7f7100 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -95,6 +95,12 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_tlb_events(x, y) do { (void)(y); } while (0) #endif +#ifdef CONFIG_DEBUG_VM_VMACACHE +#define count_vm_vmacache_event(x) count_vm_event(x) +#else +#define count_vm_vmacache_event(x) do {} while (0) +#endif + #define __count_zone_vm_events(item, zone, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ zone_idx(zone), delta) diff --git a/include/linux/zbud.h b/include/linux/zbud.h index 2571a5cfa5fc..13af0d450bf6 100644 --- a/include/linux/zbud.h +++ b/include/linux/zbud.h @@ -11,7 +11,7 @@ struct zbud_ops { struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops); void zbud_destroy_pool(struct zbud_pool *pool); -int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp, +int zbud_alloc(struct zbud_pool *pool, unsigned int size, gfp_t gfp, unsigned long *handle); void zbud_free(struct zbud_pool *pool, unsigned long handle); int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries); diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h index 06f544ef2f6f..c6814b917bdf 100644 --- a/include/trace/events/compaction.h +++ b/include/trace/events/compaction.h @@ -5,6 +5,7 @@ #define _TRACE_COMPACTION_H #include <linux/types.h> +#include <linux/list.h> #include <linux/tracepoint.h> #include <trace/events/gfpflags.h> @@ -47,10 +48,11 @@ DEFINE_EVENT(mm_compaction_isolate_template, mm_compaction_isolate_freepages, TRACE_EVENT(mm_compaction_migratepages, - TP_PROTO(unsigned long nr_migrated, - unsigned long nr_failed), + TP_PROTO(unsigned long nr_all, + int migrate_rc, + struct list_head *migratepages), - TP_ARGS(nr_migrated, nr_failed), + TP_ARGS(nr_all, migrate_rc, migratepages), TP_STRUCT__entry( __field(unsigned long, nr_migrated) @@ -58,7 +60,22 @@ TRACE_EVENT(mm_compaction_migratepages, ), TP_fast_assign( - __entry->nr_migrated = nr_migrated; + unsigned long nr_failed = 0; + struct list_head *page_lru; + + /* + * migrate_pages() returns either a non-negative number + * with the number of pages that failed migration, or an + * error code, in which case we need to count the remaining + * pages manually + */ + if (migrate_rc >= 0) + nr_failed = migrate_rc; + else + list_for_each(page_lru, migratepages) + nr_failed++; + + __entry->nr_migrated = nr_all - nr_failed; __entry->nr_failed = nr_failed; ), diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index 1eddbf1557f2..d6fd8e5b14b7 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -34,7 +34,6 @@ {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_KMEMCG, "GFP_KMEMCG"}, \ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h index 132a985aba8b..69590b6ffc09 100644 --- a/include/trace/events/vmscan.h +++ b/include/trace/events/vmscan.h @@ -191,6 +191,7 @@ TRACE_EVENT(mm_shrink_slab_start, TP_STRUCT__entry( __field(struct shrinker *, shr) __field(void *, shrink) + __field(int, nid) __field(long, nr_objects_to_shrink) __field(gfp_t, gfp_flags) __field(unsigned long, pgs_scanned) @@ -203,6 +204,7 @@ TRACE_EVENT(mm_shrink_slab_start, TP_fast_assign( __entry->shr = shr; __entry->shrink = shr->scan_objects; + __entry->nid = sc->nid; __entry->nr_objects_to_shrink = nr_objects_to_shrink; __entry->gfp_flags = sc->gfp_mask; __entry->pgs_scanned = pgs_scanned; @@ -212,9 +214,10 @@ TRACE_EVENT(mm_shrink_slab_start, __entry->total_scan = total_scan; ), - TP_printk("%pF %p: objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld", + TP_printk("%pF %p: nid: %d objects to shrink %ld gfp_flags %s pgs_scanned %ld lru_pgs %ld cache items %ld delta %lld total_scan %ld", __entry->shrink, __entry->shr, + __entry->nid, __entry->nr_objects_to_shrink, show_gfp_flags(__entry->gfp_flags), __entry->pgs_scanned, @@ -225,13 +228,15 @@ TRACE_EVENT(mm_shrink_slab_start, ); TRACE_EVENT(mm_shrink_slab_end, - TP_PROTO(struct shrinker *shr, int shrinker_retval, - long unused_scan_cnt, long new_scan_cnt), + TP_PROTO(struct shrinker *shr, int nid, int shrinker_retval, + long unused_scan_cnt, long new_scan_cnt, long total_scan), - TP_ARGS(shr, shrinker_retval, unused_scan_cnt, new_scan_cnt), + TP_ARGS(shr, nid, shrinker_retval, unused_scan_cnt, new_scan_cnt, + total_scan), TP_STRUCT__entry( __field(struct shrinker *, shr) + __field(int, nid) __field(void *, shrink) __field(long, unused_scan) __field(long, new_scan) @@ -241,16 +246,18 @@ TRACE_EVENT(mm_shrink_slab_end, TP_fast_assign( __entry->shr = shr; + __entry->nid = nid; __entry->shrink = shr->scan_objects; __entry->unused_scan = unused_scan_cnt; __entry->new_scan = new_scan_cnt; __entry->retval = shrinker_retval; - __entry->total_scan = new_scan_cnt - unused_scan_cnt; + __entry->total_scan = total_scan; ), - TP_printk("%pF %p: unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", + TP_printk("%pF %p: nid: %d unused scan count %ld new scan count %ld total_scan %ld last shrinker return val %d", __entry->shrink, __entry->shr, + __entry->nid, __entry->unused_scan, __entry->new_scan, __entry->total_scan, |