Merge branch 'akpm' (second patch-bomb from Andrew)

Merge second patchbomb from Andrew Morton: - the rest of MM - misc fs fixes - add execveat() syscall - new ratelimit feature for fault-injection - decompressor updates - ipc/ updates - fallocate feature creep - fsnotify cleanups - a few other misc things * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (99 commits) cgroups: Documentation: fix trivial typos and wrong paragraph numberings parisc: percpu: update comments referring to __get_cpu_var percpu: update local_ops.txt to reflect this_cpu operations percpu: remove __get_cpu_var and __raw_get_cpu_var macros fsnotify: remove destroy_list from fsnotify_mark fsnotify: unify inode and mount marks handling fallocate: create FAN_MODIFY and IN_MODIFY events mm/cma: make kmemleak ignore CMA regions slub: fix cpuset check in get_any_partial slab: fix cpuset check in fallback_alloc shmdt: use i_size_read() instead of ->i_size ipc/shm.c: fix overly aggressive shmdt() when calls span multiple segments ipc/msg: increase MSGMNI, remove scaling ipc/sem.c: increase SEMMSL, SEMMNI, SEMOPM ipc/sem.c: change memory barrier in sem_lock() to smp_rmb() lib/decompress.c: consistency of compress formats for kernel image decompress_bunzip2: off by one in get_next_block() usr/Kconfig: make initrd compression algorithm selection not expert fault-inject: add ratelimit option ratelimit: add initialization macro ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-12-14 00:00:36 +0300
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-12-14 00:00:36 +0300
commit: 78a45c6f067824cf5d0a9fedea7339ac2e28603c (patch)
tree: b4f78c8b6b9059ddace0a18c11629b8d2045f793 /include
parent: f96fe225677b3efb74346ebd56fafe3997b02afa (diff)
parent: 29d293b6007b91a4463f05bc8d0b26e0e65c5816 (diff)
download: linux-78a45c6f067824cf5d0a9fedea7339ac2e28603c.tar.xz
30 files changed, 356 insertions, 135 deletions
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 61f29e5ea840..576e4639ca60 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -53,6 +53,10 @@ struct linux_binprm {
 #define BINPRM_FLAGS_EXECFD_BIT 1
 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
 
+/* filename of the binary will be inaccessible after exec */
+#define BINPRM_FLAGS_PATH_INACCESSIBLE_BIT 2
+#define BINPRM_FLAGS_PATH_INACCESSIBLE (1 << BINPRM_FLAGS_PATH_INACCESSIBLE_BIT)
+
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
 	const siginfo_t *siginfo;
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index e1c8d080c427..34e020c23644 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -45,6 +45,7 @@
  * bitmap_set(dst, pos, nbits)			Set specified bit area
  * bitmap_clear(dst, pos, nbits)		Clear specified bit area
  * bitmap_find_next_zero_area(buf, len, pos, n, mask)	Find bit free area
+ * bitmap_find_next_zero_area_off(buf, len, pos, n, mask)	as above
  * bitmap_shift_right(dst, src, n, nbits)	*dst = *src >> n
  * bitmap_shift_left(dst, src, n, nbits)	*dst = *src << n
  * bitmap_remap(dst, src, old, new, nbits)	*dst = map(old, new)(src)
@@ -114,11 +115,36 @@ extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
 extern void bitmap_set(unsigned long *map, unsigned int start, int len);
 extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
-extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask);
+
+extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+						    unsigned long size,
+						    unsigned long start,
+						    unsigned int nr,
+						    unsigned long align_mask,
+						    unsigned long align_offset);
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+static inline unsigned long
+bitmap_find_next_zero_area(unsigned long *map,
+			   unsigned long size,
+			   unsigned long start,
+			   unsigned int nr,
+			   unsigned long align_mask)
+{
+	return bitmap_find_next_zero_area_off(map, size, start, nr,
+					      align_mask, 0);
+}
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e6494261eaff..7450ca2ac1fc 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -357,6 +357,9 @@ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
 
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
+asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
+		     const compat_uptr_t __user *argv,
+		     const compat_uptr_t __user *envp, int flags);
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index c6f996f2abb6..798fad9e420d 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -5,6 +5,7 @@
 
 #include <linux/types.h>
 #include <linux/debugfs.h>
+#include <linux/ratelimit.h>
 #include <linux/atomic.h>
 
 /*
@@ -25,14 +26,18 @@ struct fault_attr {
 	unsigned long reject_end;
 
 	unsigned long count;
+	struct ratelimit_state ratelimit_state;
+	struct dentry *dname;
 };
 
-#define FAULT_ATTR_INITIALIZER {				\
-		.interval = 1,					\
-		.times = ATOMIC_INIT(1),			\
-		.require_end = ULONG_MAX,			\
-		.stacktrace_depth = 32,				\
-		.verbose = 2,					\
+#define FAULT_ATTR_INITIALIZER {					\
+		.interval = 1,						\
+		.times = ATOMIC_INIT(1),				\
+		.require_end = ULONG_MAX,				\
+		.stacktrace_depth = 32,					\
+		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
+		.verbose = 2,						\
+		.dname = NULL,						\
 	}
 
 #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb29b02d9bb6..4193a0bd99b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
 #include <linux/pid.h>
 #include <linux/bug.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fiemap.h>
@@ -401,7 +402,7 @@ struct address_space {
 	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
-	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
+	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
 	/* Protected by tree_lock together with the radix tree */
 	unsigned long		nrpages;	/* number of total pages */
 	unsigned long		nrshadows;	/* number of shadow entries */
@@ -467,6 +468,26 @@ struct block_device {
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
+static inline void i_mmap_lock_write(struct address_space *mapping)
+{
+	down_write(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_write(struct address_space *mapping)
+{
+	up_write(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_lock_read(struct address_space *mapping)
+{
+	down_read(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_read(struct address_space *mapping)
+{
+	up_read(&mapping->i_mmap_rwsem);
+}
+
 /*
  * Might pages of this file be mapped into userspace?
  */
@@ -2075,6 +2096,7 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
+extern struct filename *getname_flags(const char __user *, int, int *);
 extern struct filename *getname(const char __user *);
 extern struct filename *getname_kernel(const char *);
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index ca060d7c4fa6..0f313f93c586 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -197,24 +197,6 @@ struct fsnotify_group {
 #define FSNOTIFY_EVENT_INODE	2
 
 /*
- * Inode specific fields in an fsnotify_mark
- */
-struct fsnotify_inode_mark {
-	struct inode *inode;		/* inode this mark is associated with */
-	struct hlist_node i_list;	/* list of marks by inode->i_fsnotify_marks */
-	struct list_head free_i_list;	/* tmp list used when freeing this mark */
-};
-
-/*
- * Mount point specific fields in an fsnotify_mark
- */
-struct fsnotify_vfsmount_mark {
-	struct vfsmount *mnt;		/* vfsmount this mark is associated with */
-	struct hlist_node m_list;	/* list of marks by inode->i_fsnotify_marks */
-	struct list_head free_m_list;	/* tmp list used when freeing this mark */
-};
-
-/*
  * a mark is simply an object attached to an in core inode which allows an
  * fsnotify listener to indicate they are either no longer interested in events
  * of a type matching mask or only interested in those events.
@@ -230,11 +212,17 @@ struct fsnotify_mark {
 	 * in kernel that found and may be using this mark. */
 	atomic_t refcnt;		/* active things looking at this mark */
 	struct fsnotify_group *group;	/* group this mark is for */
-	struct list_head g_list;	/* list of marks by group->i_fsnotify_marks */
+	struct list_head g_list;	/* list of marks by group->i_fsnotify_marks
+					 * Also reused for queueing mark into
+					 * destroy_list when it's waiting for
+					 * the end of SRCU period before it can
+					 * be freed */
 	spinlock_t lock;		/* protect group and inode */
+	struct hlist_node obj_list;	/* list of marks for inode / vfsmount */
+	struct list_head free_list;	/* tmp list used when freeing this mark */
 	union {
-		struct fsnotify_inode_mark i;
-		struct fsnotify_vfsmount_mark m;
+		struct inode *inode;	/* inode this mark is associated with */
+		struct vfsmount *mnt;	/* vfsmount this mark is associated with */
 	};
 	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
@@ -243,7 +231,6 @@ struct fsnotify_mark {
 #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY	0x08
 #define FSNOTIFY_MARK_FLAG_ALIVE		0x10
 	unsigned int flags;		/* vfsmount or inode mark? */
-	struct list_head destroy_list;
 	void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
 };
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 07d2699cdb51..b840e3b2770d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -110,11 +110,8 @@ struct vm_area_struct;
 #define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 			 __GFP_RECLAIMABLE)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
-#define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
-			 __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
-				 __GFP_HARDWALL | __GFP_HIGHMEM | \
-				 __GFP_MOVABLE)
+#define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_IOFS	(__GFP_IO | __GFP_FS)
 #define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 35e7eca4e33b..e365d5ec69cb 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -7,15 +7,6 @@
 #include <linux/notifier.h>
 #include <linux/nsproxy.h>
 
-/*
- * ipc namespace events
- */
-#define IPCNS_MEMCHANGED   0x00000001   /* Notify lowmem size changed */
-#define IPCNS_CREATED  0x00000002   /* Notify new ipc namespace created */
-#define IPCNS_REMOVED  0x00000003   /* Notify ipc namespace removed */
-
-#define IPCNS_CALLBACK_PRI 0
-
 struct user_namespace;
 
 struct ipc_ids {
@@ -38,7 +29,6 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmni;
 	atomic_t	msg_bytes;
 	atomic_t	msg_hdrs;
-	int		auto_msgmni;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
@@ -77,18 +67,8 @@ extern atomic_t nr_ipc_ns;
 extern spinlock_t mq_lock;
 
 #ifdef CONFIG_SYSVIPC
-extern int register_ipcns_notifier(struct ipc_namespace *);
-extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern void unregister_ipcns_notifier(struct ipc_namespace *);
-extern int ipcns_notify(unsigned long);
 extern void shm_destroy_orphaned(struct ipc_namespace *ns);
 #else /* CONFIG_SYSVIPC */
-static inline int register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
-static inline int ipcns_notify(unsigned long l) { return 0; }
 static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #endif /* CONFIG_SYSVIPC */
 
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 057e95971014..e705467ddb47 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -21,6 +21,8 @@
 #ifndef __KMEMLEAK_H
 #define __KMEMLEAK_H
 
+#include <linux/slab.h>
+
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
 extern void kmemleak_init(void) __ref;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6ea9f919e888..7c95af8d552c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -400,8 +400,8 @@ int memcg_cache_id(struct mem_cgroup *memcg);
 
 void memcg_update_array_size(int num_groups);
 
-struct kmem_cache *
-__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
+void __memcg_kmem_put_cache(struct kmem_cache *cachep);
 
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
@@ -492,7 +492,13 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	if (unlikely(fatal_signal_pending(current)))
 		return cachep;
 
-	return __memcg_kmem_get_cache(cachep, gfp);
+	return __memcg_kmem_get_cache(cachep);
+}
+
+static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_put_cache(cachep);
 }
 #else
 #define for_each_memcg_cache_index(_idx)	\
@@ -528,6 +534,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	return cachep;
 }
+
+static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3b337efbe533..c0a67b894c4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/shrinker.h>
 #include <linux/resource.h>
+#include <linux/page_ext.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -2060,7 +2061,22 @@ static inline void vm_stat_account(struct mm_struct *mm,
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-extern void kernel_map_pages(struct page *page, int numpages, int enable);
+extern bool _debug_pagealloc_enabled;
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline bool debug_pagealloc_enabled(void)
+{
+	return _debug_pagealloc_enabled;
+}
+
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (!debug_pagealloc_enabled())
+		return;
+
+	__kernel_map_pages(page, numpages, enable);
+}
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif /* CONFIG_HIBERNATION */
@@ -2094,9 +2110,9 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 #endif
 
-unsigned long shrink_slab(struct shrink_control *shrink,
-			  unsigned long nr_pages_scanned,
-			  unsigned long lru_pages);
+unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
+				unsigned long nr_scanned,
+				unsigned long nr_eligible);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
@@ -2155,20 +2171,36 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+extern struct page_ext_operations debug_guardpage_ops;
+extern struct page_ext_operations page_poisoning_ops;
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern unsigned int _debug_guardpage_minorder;
+extern bool _debug_guardpage_enabled;
 
 static inline unsigned int debug_guardpage_minorder(void)
 {
 	return _debug_guardpage_minorder;
 }
 
+static inline bool debug_guardpage_enabled(void)
+{
+	return _debug_guardpage_enabled;
+}
+
 static inline bool page_is_guard(struct page *page)
 {
-	return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	if (!debug_guardpage_enabled())
+		return false;
+
+	page_ext = lookup_page_ext(page);
+	return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
 }
 #else
 static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
 static inline bool page_is_guard(struct page *page) { return false; }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bf9f57529dcf..6d34aa266a8c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,7 +10,6 @@
 #include <linux/rwsem.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
-#include <linux/page-debug-flags.h>
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <asm/page.h>
@@ -186,9 +185,6 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-	unsigned long debug_flags;	/* Use atomic bitops on this */
-#endif
 
 #ifdef CONFIG_KMEMCHECK
 	/*
@@ -534,4 +530,12 @@ enum tlb_flush_reason {
 	NR_TLB_FLUSH_REASONS,
 };
 
+ /*
+  * A swap entry has to fit into a "unsigned long", as the entry is hidden
+  * in the "index" field of the swapper address space.
+  */
+typedef struct {
+	unsigned long val;
+} swp_entry_t;
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 88787bb4b3b9..ab8564b03468 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -154,7 +154,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
+ * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3879d7664dfc..2f0856d14b21 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -722,6 +722,9 @@ typedef struct pglist_data {
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
+#ifdef CONFIG_PAGE_EXTENSION
+	struct page_ext *node_page_ext;
+#endif
 #endif
 #ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
@@ -1075,6 +1078,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
 
 struct page;
+struct page_ext;
 struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
@@ -1092,6 +1096,14 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
+#ifdef CONFIG_PAGE_EXTENSION
+	/*
+	 * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use
+	 * section. (see page_ext.h about this.)
+	 */
+	struct page_ext *page_ext;
+	unsigned long pad;
+#endif
 	/*
 	 * WARNING: mem_section must be a power-of-2 in size for the
 	 * calculation and use of SECTION_ROOT_MASK to make sense.
diff --git a/include/linux/oom.h b/include/linux/oom.h
index e8d6e1058723..853698c721f7 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -92,6 +92,17 @@ static inline bool oom_gfp_allowed(gfp_t gfp_mask)
 
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
+static inline bool task_will_free_mem(struct task_struct *task)
+{
+	/*
+	 * A coredumping process may sleep for an extended period in exit_mm(),
+	 * so the oom killer cannot assume that the process will promptly exit
+	 * and release memory.
+	 */
+	return (task->flags & PF_EXITING) &&
+		!(task->signal->flags & SIGNAL_GROUP_COREDUMP);
+}
+
 /* sysctls */
 extern int sysctl_oom_dump_tasks;
 extern int sysctl_oom_kill_allocating_task;
diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h
deleted file mode 100644
index 22691f614043..000000000000
--- a/include/linux/page-debug-flags.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef LINUX_PAGE_DEBUG_FLAGS_H
-#define  LINUX_PAGE_DEBUG_FLAGS_H
-
-/*
- * page->debug_flags bits:
- *
- * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to
- * implement generic debug pagealloc feature. The pages are filled with
- * poison patterns and set this flag after free_pages(). The poisoned
- * pages are verified whether the patterns are not corrupted and clear
- * the flag before alloc_pages().
- */
-
-enum page_debug_flags {
-	PAGE_DEBUG_FLAG_POISON,		/* Page is poisoned */
-	PAGE_DEBUG_FLAG_GUARD,
-};
-
-/*
- * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably
- * gets turned off when no debug features are enabling it!
- */
-
-#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-#if !defined(CONFIG_PAGE_POISONING) && \
-    !defined(CONFIG_PAGE_GUARD) \
-/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */
-#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features!
-#endif
-#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */
-
-#endif /* LINUX_PAGE_DEBUG_FLAGS_H */
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
new file mode 100644
index 000000000000..d2a2c84c72d0
--- /dev/null
+++ b/include/linux/page_ext.h
@@ -0,0 +1,84 @@
+#ifndef __LINUX_PAGE_EXT_H
+#define __LINUX_PAGE_EXT_H
+
+#include <linux/types.h>
+#include <linux/stacktrace.h>
+
+struct pglist_data;
+struct page_ext_operations {
+	bool (*need)(void);
+	void (*init)(void);
+};
+
+#ifdef CONFIG_PAGE_EXTENSION
+
+/*
+ * page_ext->flags bits:
+ *
+ * PAGE_EXT_DEBUG_POISON is set for poisoned pages. This is used to
+ * implement generic debug pagealloc feature. The pages are filled with
+ * poison patterns and set this flag after free_pages(). The poisoned
+ * pages are verified whether the patterns are not corrupted and clear
+ * the flag before alloc_pages().
+ */
+
+enum page_ext_flags {
+	PAGE_EXT_DEBUG_POISON,		/* Page is poisoned */
+	PAGE_EXT_DEBUG_GUARD,
+	PAGE_EXT_OWNER,
+};
+
+/*
+ * Page Extension can be considered as an extended mem_map.
+ * A page_ext page is associated with every page descriptor. The
+ * page_ext helps us add more information about the page.
+ * All page_ext are allocated at boot or memory hotplug event,
+ * then the page_ext for pfn always exists.
+ */
+struct page_ext {
+	unsigned long flags;
+#ifdef CONFIG_PAGE_OWNER
+	unsigned int order;
+	gfp_t gfp_mask;
+	struct stack_trace trace;
+	unsigned long trace_entries[8];
+#endif
+};
+
+extern void pgdat_page_ext_init(struct pglist_data *pgdat);
+
+#ifdef CONFIG_SPARSEMEM
+static inline void page_ext_init_flatmem(void)
+{
+}
+extern void page_ext_init(void);
+#else
+extern void page_ext_init_flatmem(void);
+static inline void page_ext_init(void)
+{
+}
+#endif
+
+struct page_ext *lookup_page_ext(struct page *page);
+
+#else /* !CONFIG_PAGE_EXTENSION */
+struct page_ext;
+
+static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_ext *lookup_page_ext(struct page *page)
+{
+	return NULL;
+}
+
+static inline void page_ext_init(void)
+{
+}
+
+static inline void page_ext_init_flatmem(void)
+{
+}
+#endif /* CONFIG_PAGE_EXTENSION */
+#endif /* __LINUX_PAGE_EXT_H */
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
new file mode 100644
index 000000000000..b48c3471c254
--- /dev/null
+++ b/include/linux/page_owner.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_PAGE_OWNER_H
+#define __LINUX_PAGE_OWNER_H
+
+#ifdef CONFIG_PAGE_OWNER
+extern bool page_owner_inited;
+extern struct page_ext_operations page_owner_ops;
+
+extern void __reset_page_owner(struct page *page, unsigned int order);
+extern void __set_page_owner(struct page *page,
+			unsigned int order, gfp_t gfp_mask);
+
+static inline void reset_page_owner(struct page *page, unsigned int order)
+{
+	if (likely(!page_owner_inited))
+		return;
+
+	__reset_page_owner(page, order);
+}
+
+static inline void set_page_owner(struct page *page,
+			unsigned int order, gfp_t gfp_mask)
+{
+	if (likely(!page_owner_inited))
+		return;
+
+	__set_page_owner(page, order, gfp_mask);
+}
+#else
+static inline void reset_page_owner(struct page *page, unsigned int order)
+{
+}
+static inline void set_page_owner(struct page *page,
+			unsigned int order, gfp_t gfp_mask)
+{
+}
+
+#endif /* CONFIG_PAGE_OWNER */
+#endif /* __LINUX_PAGE_OWNER_H */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 420032d41d27..57f3a1c550dc 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -254,8 +254,6 @@ do {									\
 #endif	/* CONFIG_SMP */
 
 #define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
-#define __raw_get_cpu_var(var)	(*raw_cpu_ptr(&(var)))
-#define __get_cpu_var(var)	(*this_cpu_ptr(&(var)))
 
 /*
  * Must be an lvalue. Since @var must be a simple identifier,
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 0a260d8a18bf..18102529254e 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -17,14 +17,20 @@ struct ratelimit_state {
 	unsigned long	begin;
 };
 
-#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)		\
-									\
-	struct ratelimit_state name = {					\
+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) {		\
 		.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
 		.interval	= interval_init,			\
 		.burst		= burst_init,				\
 	}
 
+#define RATELIMIT_STATE_INIT_DISABLED					\
+	RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)		\
+									\
+	struct ratelimit_state name =					\
+		RATELIMIT_STATE_INIT(name, interval_init, burst_init)	\
+
 static inline void ratelimit_state_init(struct ratelimit_state *rs,
 					int interval, int burst)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55f5ee7cc3d3..8db31ef98d2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1364,6 +1364,10 @@ struct task_struct {
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
+#ifdef CONFIG_MEMCG_KMEM
+	unsigned memcg_kmem_skip_account:1;
+#endif
+
 	unsigned long atomic_flags; /* Flags needing atomic access. */
 
 	pid_t pid;
@@ -1679,8 +1683,7 @@ struct task_struct {
 	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
-#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
-	unsigned int memcg_kmem_skip_account;
+#ifdef CONFIG_MEMCG
 	struct memcg_oom_info {
 		struct mem_cgroup *memcg;
 		gfp_t gfp_mask;
@@ -2482,6 +2485,10 @@ extern void do_group_exit(int);
 extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
+extern int do_execveat(int, struct filename *,
+		       const char __user * const __user *,
+		       const char __user * const __user *,
+		       int);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 68c097077ef0..f4aee75f00b1 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -18,8 +18,6 @@ struct shrink_control {
 	 */
 	unsigned long nr_to_scan;
 
-	/* shrink from these nodes */
-	nodemask_t nodes_to_scan;
 	/* current node being shrunk (for NUMA aware shrinkers) */
 	int nid;
 };
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 8a2457d42fc8..9a139b637069 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -493,7 +493,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * @memcg: pointer to the memcg this cache belongs to
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
- * @nr_pages: number of pages that belongs to this cache.
  */
 struct memcg_cache_params {
 	bool is_root_cache;
@@ -506,7 +505,6 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head list;
 			struct kmem_cache *root_cache;
-			atomic_t nr_pages;
 		};
 	};
 };
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 115b570e3bff..669045ab73f3 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_STACKTRACE_H
 #define __LINUX_STACKTRACE_H
 
+#include <linux/types.h>
+
 struct task_struct;
 struct pt_regs;
 
@@ -20,6 +22,8 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
+extern int snprint_stack_trace(char *buf, size_t size,
+			struct stack_trace *trace, int spaces);
 
 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
 extern void save_stack_trace_user(struct stack_trace *trace);
@@ -32,6 +36,7 @@ extern void save_stack_trace_user(struct stack_trace *trace);
 # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
 # define save_stack_trace_user(trace)			do { } while (0)
 # define print_stack_trace(trace, spaces)		do { } while (0)
+# define snprint_stack_trace(buf, size, trace, spaces)	do { } while (0)
 #endif
 
 #endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 37a585beef5c..34e8b60ab973 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -102,14 +102,6 @@ union swap_header {
 	} info;
 };
 
- /* A swap entry has to fit into a "unsigned long", as
-  * the entry is hidden in the "index" field of the
-  * swapper address space.
-  */
-typedef struct {
-	unsigned long val;
-} swp_entry_t;
-
 /*
  * current->reclaim_state points to one of these when a task is running
  * memory reclaim
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c9afdc7a7f84..85893d744901 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -877,4 +877,9 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
 asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+
+asmlinkage long sys_execveat(int dfd, const char __user *filename,
+			const char __user *const __user *argv,
+			const char __user *const __user *envp, int flags);
+
 #endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 730334cdf037..9246d32dc973 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -90,6 +90,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_DEBUG_VM_VMACACHE
 		VMACACHE_FIND_CALLS,
 		VMACACHE_FIND_HITS,
+		VMACACHE_FULL_FLUSHES,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 22749c134117..e016bd9b1a04 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -707,9 +707,11 @@ __SYSCALL(__NR_getrandom, sys_getrandom)
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 281
+__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
 
 #undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 282
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h
index a70375526578..f51c8001dbe5 100644
--- a/include/uapi/linux/msg.h
+++ b/include/uapi/linux/msg.h
@@ -51,16 +51,28 @@ struct msginfo {
 };
 
 /*
- * Scaling factor to compute msgmni:
- * the memory dedicated to msg queues (msgmni * msgmnb) should occupy
- * at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c):
- * up to 8MB       : msgmni = 16 (MSGMNI)
- * 4 GB            : msgmni = 8K
- * more than 16 GB : msgmni = 32K (IPCMNI)
+ * MSGMNI, MSGMAX and MSGMNB are default values which can be
+ * modified by sysctl.
+ *
+ * MSGMNI is the upper limit for the number of messages queues per
+ * namespace.
+ * It has been chosen to be as large possible without facilitating
+ * scenarios where userspace causes overflows when adjusting the limits via
+ * operations of the form retrieve current limit; add X; update limit".
+ *
+ * MSGMNB is the default size of a new message queue. Non-root tasks can
+ * decrease the size with msgctl(IPC_SET), root tasks
+ * (actually: CAP_SYS_RESOURCE) can both increase and decrease the queue
+ * size. The optimal value is application dependent.
+ * 16384 is used because it was always used (since 0.99.10)
+ *
+ * MAXMAX is the maximum size of an individual message, it's a global
+ * (per-namespace) limit that applies for all message queues.
+ * It's set to 1/2 of MSGMNB, to ensure that at least two messages fit into
+ * the queue. This is also an arbitrary choice (since 2.6.0).
  */
-#define MSG_MEM_SCALE 32
 
-#define MSGMNI    16   /* <= IPCMNI */     /* max # of msg queue identifiers */
+#define MSGMNI 32000   /* <= IPCMNI */     /* max # of msg queue identifiers */
 #define MSGMAX  8192   /* <= INT_MAX */   /* max size of message (bytes) */
 #define MSGMNB 16384   /* <= INT_MAX */   /* default max size of a message queue */
 
diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index 541fce03b50c..dd73b908b2f3 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h
@@ -63,10 +63,22 @@ struct  seminfo {
 	int semaem;
 };
 
-#define SEMMNI  128             /* <= IPCMNI  max # of semaphore identifiers */
-#define SEMMSL  250             /* <= 8 000 max num of semaphores per id */
+/*
+ * SEMMNI, SEMMSL and SEMMNS are default values which can be
+ * modified by sysctl.
+ * The values has been chosen to be larger than necessary for any
+ * known configuration.
+ *
+ * SEMOPM should not be increased beyond 1000, otherwise there is the
+ * risk that semop()/semtimedop() fails due to kernel memory fragmentation when
+ * allocating the sop array.
+ */
+
+
+#define SEMMNI  32000           /* <= IPCMNI  max # of semaphore identifiers */
+#define SEMMSL  32000           /* <= INT_MAX max num of semaphores per id */
 #define SEMMNS  (SEMMNI*SEMMSL) /* <= INT_MAX max # of semaphores in system */
-#define SEMOPM  32	        /* <= 1 000 max num of ops per semop call */
+#define SEMOPM  500	        /* <= 1 000 max num of ops per semop call */
 #define SEMVMX  32767           /* <= 32767 semaphore maximum value */
 #define SEMAEM  SEMVMX          /* adjust on exit max value */
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-12-14 00:00:36 +0300
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-12-14 00:00:36 +0300
commit	78a45c6f067824cf5d0a9fedea7339ac2e28603c (patch)
tree	b4f78c8b6b9059ddace0a18c11629b8d2045f793 /include
parent	f96fe225677b3efb74346ebd56fafe3997b02afa (diff)
parent	29d293b6007b91a4463f05bc8d0b26e0e65c5816 (diff)
download	linux-78a45c6f067824cf5d0a9fedea7339ac2e28603c.tar.xz