summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-14 00:00:36 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-14 00:00:36 +0300
commit78a45c6f067824cf5d0a9fedea7339ac2e28603c (patch)
treeb4f78c8b6b9059ddace0a18c11629b8d2045f793 /include
parentf96fe225677b3efb74346ebd56fafe3997b02afa (diff)
parent29d293b6007b91a4463f05bc8d0b26e0e65c5816 (diff)
downloadlinux-78a45c6f067824cf5d0a9fedea7339ac2e28603c.tar.xz
Merge branch 'akpm' (second patch-bomb from Andrew)
Merge second patchbomb from Andrew Morton: - the rest of MM - misc fs fixes - add execveat() syscall - new ratelimit feature for fault-injection - decompressor updates - ipc/ updates - fallocate feature creep - fsnotify cleanups - a few other misc things * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (99 commits) cgroups: Documentation: fix trivial typos and wrong paragraph numberings parisc: percpu: update comments referring to __get_cpu_var percpu: update local_ops.txt to reflect this_cpu operations percpu: remove __get_cpu_var and __raw_get_cpu_var macros fsnotify: remove destroy_list from fsnotify_mark fsnotify: unify inode and mount marks handling fallocate: create FAN_MODIFY and IN_MODIFY events mm/cma: make kmemleak ignore CMA regions slub: fix cpuset check in get_any_partial slab: fix cpuset check in fallback_alloc shmdt: use i_size_read() instead of ->i_size ipc/shm.c: fix overly aggressive shmdt() when calls span multiple segments ipc/msg: increase MSGMNI, remove scaling ipc/sem.c: increase SEMMSL, SEMMNI, SEMOPM ipc/sem.c: change memory barrier in sem_lock() to smp_rmb() lib/decompress.c: consistency of compress formats for kernel image decompress_bunzip2: off by one in get_next_block() usr/Kconfig: make initrd compression algorithm selection not expert fault-inject: add ratelimit option ratelimit: add initialization macro ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/binfmts.h4
-rw-r--r--include/linux/bitmap.h36
-rw-r--r--include/linux/compat.h3
-rw-r--r--include/linux/fault-inject.h17
-rw-r--r--include/linux/fs.h24
-rw-r--r--include/linux/fsnotify_backend.h31
-rw-r--r--include/linux/gfp.h7
-rw-r--r--include/linux/ipc_namespace.h20
-rw-r--r--include/linux/kmemleak.h2
-rw-r--r--include/linux/memcontrol.h16
-rw-r--r--include/linux/mm.h42
-rw-r--r--include/linux/mm_types.h12
-rw-r--r--include/linux/mmu_notifier.h2
-rw-r--r--include/linux/mmzone.h12
-rw-r--r--include/linux/oom.h11
-rw-r--r--include/linux/page-debug-flags.h32
-rw-r--r--include/linux/page_ext.h84
-rw-r--r--include/linux/page_owner.h38
-rw-r--r--include/linux/percpu-defs.h2
-rw-r--r--include/linux/ratelimit.h12
-rw-r--r--include/linux/sched.h11
-rw-r--r--include/linux/shrinker.h2
-rw-r--r--include/linux/slab.h2
-rw-r--r--include/linux/stacktrace.h5
-rw-r--r--include/linux/swap.h8
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--include/linux/vm_event_item.h1
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/msg.h28
-rw-r--r--include/uapi/linux/sem.h18
30 files changed, 356 insertions, 135 deletions
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 61f29e5ea840..576e4639ca60 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -53,6 +53,10 @@ struct linux_binprm {
#define BINPRM_FLAGS_EXECFD_BIT 1
#define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
+/* filename of the binary will be inaccessible after exec */
+#define BINPRM_FLAGS_PATH_INACCESSIBLE_BIT 2
+#define BINPRM_FLAGS_PATH_INACCESSIBLE (1 << BINPRM_FLAGS_PATH_INACCESSIBLE_BIT)
+
/* Function parameter for binfmt->coredump */
struct coredump_params {
const siginfo_t *siginfo;
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index e1c8d080c427..34e020c23644 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -45,6 +45,7 @@
* bitmap_set(dst, pos, nbits) Set specified bit area
* bitmap_clear(dst, pos, nbits) Clear specified bit area
* bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area
+ * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above
* bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n
* bitmap_shift_left(dst, src, n, nbits) *dst = *src << n
* bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src)
@@ -114,11 +115,36 @@ extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
extern void bitmap_set(unsigned long *map, unsigned int start, int len);
extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
-extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
- unsigned long size,
- unsigned long start,
- unsigned int nr,
- unsigned long align_mask);
+
+extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask,
+ unsigned long align_offset);
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+static inline unsigned long
+bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask)
+{
+ return bitmap_find_next_zero_area_off(map, size, start, nr,
+ align_mask, 0);
+}
extern int bitmap_scnprintf(char *buf, unsigned int len,
const unsigned long *src, int nbits);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e6494261eaff..7450ca2ac1fc 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -357,6 +357,9 @@ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
const compat_uptr_t __user *envp);
+asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
+ const compat_uptr_t __user *argv,
+ const compat_uptr_t __user *envp, int flags);
asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index c6f996f2abb6..798fad9e420d 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -5,6 +5,7 @@
#include <linux/types.h>
#include <linux/debugfs.h>
+#include <linux/ratelimit.h>
#include <linux/atomic.h>
/*
@@ -25,14 +26,18 @@ struct fault_attr {
unsigned long reject_end;
unsigned long count;
+ struct ratelimit_state ratelimit_state;
+ struct dentry *dname;
};
-#define FAULT_ATTR_INITIALIZER { \
- .interval = 1, \
- .times = ATOMIC_INIT(1), \
- .require_end = ULONG_MAX, \
- .stacktrace_depth = 32, \
- .verbose = 2, \
+#define FAULT_ATTR_INITIALIZER { \
+ .interval = 1, \
+ .times = ATOMIC_INIT(1), \
+ .require_end = ULONG_MAX, \
+ .stacktrace_depth = 32, \
+ .ratelimit_state = RATELIMIT_STATE_INIT_DISABLED, \
+ .verbose = 2, \
+ .dname = NULL, \
}
#define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb29b02d9bb6..4193a0bd99b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
#include <linux/pid.h>
#include <linux/bug.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
#include <linux/capability.h>
#include <linux/semaphore.h>
#include <linux/fiemap.h>
@@ -401,7 +402,7 @@ struct address_space {
atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
- struct mutex i_mmap_mutex; /* protect tree, count, list */
+ struct rw_semaphore i_mmap_rwsem; /* protect tree, count, list */
/* Protected by tree_lock together with the radix tree */
unsigned long nrpages; /* number of total pages */
unsigned long nrshadows; /* number of shadow entries */
@@ -467,6 +468,26 @@ struct block_device {
int mapping_tagged(struct address_space *mapping, int tag);
+static inline void i_mmap_lock_write(struct address_space *mapping)
+{
+ down_write(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_write(struct address_space *mapping)
+{
+ up_write(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_lock_read(struct address_space *mapping)
+{
+ down_read(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_read(struct address_space *mapping)
+{
+ up_read(&mapping->i_mmap_rwsem);
+}
+
/*
* Might pages of this file be mapped into userspace?
*/
@@ -2075,6 +2096,7 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
extern struct file * dentry_open(const struct path *, int, const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
+extern struct filename *getname_flags(const char __user *, int, int *);
extern struct filename *getname(const char __user *);
extern struct filename *getname_kernel(const char *);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index ca060d7c4fa6..0f313f93c586 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -197,24 +197,6 @@ struct fsnotify_group {
#define FSNOTIFY_EVENT_INODE 2
/*
- * Inode specific fields in an fsnotify_mark
- */
-struct fsnotify_inode_mark {
- struct inode *inode; /* inode this mark is associated with */
- struct hlist_node i_list; /* list of marks by inode->i_fsnotify_marks */
- struct list_head free_i_list; /* tmp list used when freeing this mark */
-};
-
-/*
- * Mount point specific fields in an fsnotify_mark
- */
-struct fsnotify_vfsmount_mark {
- struct vfsmount *mnt; /* vfsmount this mark is associated with */
- struct hlist_node m_list; /* list of marks by inode->i_fsnotify_marks */
- struct list_head free_m_list; /* tmp list used when freeing this mark */
-};
-
-/*
* a mark is simply an object attached to an in core inode which allows an
* fsnotify listener to indicate they are either no longer interested in events
* of a type matching mask or only interested in those events.
@@ -230,11 +212,17 @@ struct fsnotify_mark {
* in kernel that found and may be using this mark. */
atomic_t refcnt; /* active things looking at this mark */
struct fsnotify_group *group; /* group this mark is for */
- struct list_head g_list; /* list of marks by group->i_fsnotify_marks */
+ struct list_head g_list; /* list of marks by group->i_fsnotify_marks
+ * Also reused for queueing mark into
+ * destroy_list when it's waiting for
+ * the end of SRCU period before it can
+ * be freed */
spinlock_t lock; /* protect group and inode */
+ struct hlist_node obj_list; /* list of marks for inode / vfsmount */
+ struct list_head free_list; /* tmp list used when freeing this mark */
union {
- struct fsnotify_inode_mark i;
- struct fsnotify_vfsmount_mark m;
+ struct inode *inode; /* inode this mark is associated with */
+ struct vfsmount *mnt; /* vfsmount this mark is associated with */
};
__u32 ignored_mask; /* events types to ignore */
#define FSNOTIFY_MARK_FLAG_INODE 0x01
@@ -243,7 +231,6 @@ struct fsnotify_mark {
#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
unsigned int flags; /* vfsmount or inode mark? */
- struct list_head destroy_list;
void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
};
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 07d2699cdb51..b840e3b2770d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -110,11 +110,8 @@ struct vm_area_struct;
#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \
__GFP_RECLAIMABLE)
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
-#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
- __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
- __GFP_HARDWALL | __GFP_HIGHMEM | \
- __GFP_MOVABLE)
+#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS)
#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 35e7eca4e33b..e365d5ec69cb 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -7,15 +7,6 @@
#include <linux/notifier.h>
#include <linux/nsproxy.h>
-/*
- * ipc namespace events
- */
-#define IPCNS_MEMCHANGED 0x00000001 /* Notify lowmem size changed */
-#define IPCNS_CREATED 0x00000002 /* Notify new ipc namespace created */
-#define IPCNS_REMOVED 0x00000003 /* Notify ipc namespace removed */
-
-#define IPCNS_CALLBACK_PRI 0
-
struct user_namespace;
struct ipc_ids {
@@ -38,7 +29,6 @@ struct ipc_namespace {
unsigned int msg_ctlmni;
atomic_t msg_bytes;
atomic_t msg_hdrs;
- int auto_msgmni;
size_t shm_ctlmax;
size_t shm_ctlall;
@@ -77,18 +67,8 @@ extern atomic_t nr_ipc_ns;
extern spinlock_t mq_lock;
#ifdef CONFIG_SYSVIPC
-extern int register_ipcns_notifier(struct ipc_namespace *);
-extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern void unregister_ipcns_notifier(struct ipc_namespace *);
-extern int ipcns_notify(unsigned long);
extern void shm_destroy_orphaned(struct ipc_namespace *ns);
#else /* CONFIG_SYSVIPC */
-static inline int register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
-static inline int ipcns_notify(unsigned long l) { return 0; }
static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
#endif /* CONFIG_SYSVIPC */
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 057e95971014..e705467ddb47 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -21,6 +21,8 @@
#ifndef __KMEMLEAK_H
#define __KMEMLEAK_H
+#include <linux/slab.h>
+
#ifdef CONFIG_DEBUG_KMEMLEAK
extern void kmemleak_init(void) __ref;
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6ea9f919e888..7c95af8d552c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -400,8 +400,8 @@ int memcg_cache_id(struct mem_cgroup *memcg);
void memcg_update_array_size(int num_groups);
-struct kmem_cache *
-__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
+void __memcg_kmem_put_cache(struct kmem_cache *cachep);
int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
@@ -492,7 +492,13 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
if (unlikely(fatal_signal_pending(current)))
return cachep;
- return __memcg_kmem_get_cache(cachep, gfp);
+ return __memcg_kmem_get_cache(cachep);
+}
+
+static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+ if (memcg_kmem_enabled())
+ __memcg_kmem_put_cache(cachep);
}
#else
#define for_each_memcg_cache_index(_idx) \
@@ -528,6 +534,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
{
return cachep;
}
+
+static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+}
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3b337efbe533..c0a67b894c4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@
#include <linux/bit_spinlock.h>
#include <linux/shrinker.h>
#include <linux/resource.h>
+#include <linux/page_ext.h>
struct mempolicy;
struct anon_vma;
@@ -2060,7 +2061,22 @@ static inline void vm_stat_account(struct mm_struct *mm,
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_DEBUG_PAGEALLOC
-extern void kernel_map_pages(struct page *page, int numpages, int enable);
+extern bool _debug_pagealloc_enabled;
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline bool debug_pagealloc_enabled(void)
+{
+ return _debug_pagealloc_enabled;
+}
+
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (!debug_pagealloc_enabled())
+ return;
+
+ __kernel_map_pages(page, numpages, enable);
+}
#ifdef CONFIG_HIBERNATION
extern bool kernel_page_present(struct page *page);
#endif /* CONFIG_HIBERNATION */
@@ -2094,9 +2110,9 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
#endif
-unsigned long shrink_slab(struct shrink_control *shrink,
- unsigned long nr_pages_scanned,
- unsigned long lru_pages);
+unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
+ unsigned long nr_scanned,
+ unsigned long nr_eligible);
#ifndef CONFIG_MMU
#define randomize_va_space 0
@@ -2155,20 +2171,36 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
unsigned int pages_per_huge_page);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+extern struct page_ext_operations debug_guardpage_ops;
+extern struct page_ext_operations page_poisoning_ops;
+
#ifdef CONFIG_DEBUG_PAGEALLOC
extern unsigned int _debug_guardpage_minorder;
+extern bool _debug_guardpage_enabled;
static inline unsigned int debug_guardpage_minorder(void)
{
return _debug_guardpage_minorder;
}
+static inline bool debug_guardpage_enabled(void)
+{
+ return _debug_guardpage_enabled;
+}
+
static inline bool page_is_guard(struct page *page)
{
- return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+ struct page_ext *page_ext;
+
+ if (!debug_guardpage_enabled())
+ return false;
+
+ page_ext = lookup_page_ext(page);
+ return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
}
#else
static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
static inline bool page_is_guard(struct page *page) { return false; }
#endif /* CONFIG_DEBUG_PAGEALLOC */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bf9f57529dcf..6d34aa266a8c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,7 +10,6 @@
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
-#include <linux/page-debug-flags.h>
#include <linux/uprobes.h>
#include <linux/page-flags-layout.h>
#include <asm/page.h>
@@ -186,9 +185,6 @@ struct page {
void *virtual; /* Kernel virtual address (NULL if
not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
- unsigned long debug_flags; /* Use atomic bitops on this */
-#endif
#ifdef CONFIG_KMEMCHECK
/*
@@ -534,4 +530,12 @@ enum tlb_flush_reason {
NR_TLB_FLUSH_REASONS,
};
+ /*
+ * A swap entry has to fit into a "unsigned long", as the entry is hidden
+ * in the "index" field of the swapper address space.
+ */
+typedef struct {
+ unsigned long val;
+} swp_entry_t;
+
#endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 88787bb4b3b9..ab8564b03468 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -154,7 +154,7 @@ struct mmu_notifier_ops {
* Therefore notifier chains can only be traversed when either
*
* 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
+ * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
* 3. No other concurrent thread can access the list (release)
*/
struct mmu_notifier {
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3879d7664dfc..2f0856d14b21 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -722,6 +722,9 @@ typedef struct pglist_data {
int nr_zones;
#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
struct page *node_mem_map;
+#ifdef CONFIG_PAGE_EXTENSION
+ struct page_ext *node_page_ext;
+#endif
#endif
#ifndef CONFIG_NO_BOOTMEM
struct bootmem_data *bdata;
@@ -1075,6 +1078,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
struct page;
+struct page_ext;
struct mem_section {
/*
* This is, logically, a pointer to an array of struct
@@ -1092,6 +1096,14 @@ struct mem_section {
/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
+#ifdef CONFIG_PAGE_EXTENSION
+ /*
+ * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use
+ * section. (see page_ext.h about this.)
+ */
+ struct page_ext *page_ext;
+ unsigned long pad;
+#endif
/*
* WARNING: mem_section must be a power-of-2 in size for the
* calculation and use of SECTION_ROOT_MASK to make sense.
diff --git a/include/linux/oom.h b/include/linux/oom.h
index e8d6e1058723..853698c721f7 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -92,6 +92,17 @@ static inline bool oom_gfp_allowed(gfp_t gfp_mask)
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
+static inline bool task_will_free_mem(struct task_struct *task)
+{
+ /*
+ * A coredumping process may sleep for an extended period in exit_mm(),
+ * so the oom killer cannot assume that the process will promptly exit
+ * and release memory.
+ */
+ return (task->flags & PF_EXITING) &&
+ !(task->signal->flags & SIGNAL_GROUP_COREDUMP);
+}
+
/* sysctls */
extern int sysctl_oom_dump_tasks;
extern int sysctl_oom_kill_allocating_task;
diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h
deleted file mode 100644
index 22691f614043..000000000000
--- a/include/linux/page-debug-flags.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef LINUX_PAGE_DEBUG_FLAGS_H
-#define LINUX_PAGE_DEBUG_FLAGS_H
-
-/*
- * page->debug_flags bits:
- *
- * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to
- * implement generic debug pagealloc feature. The pages are filled with
- * poison patterns and set this flag after free_pages(). The poisoned
- * pages are verified whether the patterns are not corrupted and clear
- * the flag before alloc_pages().
- */
-
-enum page_debug_flags {
- PAGE_DEBUG_FLAG_POISON, /* Page is poisoned */
- PAGE_DEBUG_FLAG_GUARD,
-};
-
-/*
- * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably
- * gets turned off when no debug features are enabling it!
- */
-
-#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-#if !defined(CONFIG_PAGE_POISONING) && \
- !defined(CONFIG_PAGE_GUARD) \
-/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */
-#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features!
-#endif
-#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */
-
-#endif /* LINUX_PAGE_DEBUG_FLAGS_H */
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
new file mode 100644
index 000000000000..d2a2c84c72d0
--- /dev/null
+++ b/include/linux/page_ext.h
@@ -0,0 +1,84 @@
+#ifndef __LINUX_PAGE_EXT_H
+#define __LINUX_PAGE_EXT_H
+
+#include <linux/types.h>
+#include <linux/stacktrace.h>
+
+struct pglist_data;
+struct page_ext_operations {
+ bool (*need)(void);
+ void (*init)(void);
+};
+
+#ifdef CONFIG_PAGE_EXTENSION
+
+/*
+ * page_ext->flags bits:
+ *
+ * PAGE_EXT_DEBUG_POISON is set for poisoned pages. This is used to
+ * implement generic debug pagealloc feature. The pages are filled with
+ * poison patterns and set this flag after free_pages(). The poisoned
+ * pages are verified whether the patterns are not corrupted and clear
+ * the flag before alloc_pages().
+ */
+
+enum page_ext_flags {
+ PAGE_EXT_DEBUG_POISON, /* Page is poisoned */
+ PAGE_EXT_DEBUG_GUARD,
+ PAGE_EXT_OWNER,
+};
+
+/*
+ * Page Extension can be considered as an extended mem_map.
+ * A page_ext page is associated with every page descriptor. The
+ * page_ext helps us add more information about the page.
+ * All page_ext are allocated at boot or memory hotplug event,
+ * then the page_ext for pfn always exists.
+ */
+struct page_ext {
+ unsigned long flags;
+#ifdef CONFIG_PAGE_OWNER
+ unsigned int order;
+ gfp_t gfp_mask;
+ struct stack_trace trace;
+ unsigned long trace_entries[8];
+#endif
+};
+
+extern void pgdat_page_ext_init(struct pglist_data *pgdat);
+
+#ifdef CONFIG_SPARSEMEM
+static inline void page_ext_init_flatmem(void)
+{
+}
+extern void page_ext_init(void);
+#else
+extern void page_ext_init_flatmem(void);
+static inline void page_ext_init(void)
+{
+}
+#endif
+
+struct page_ext *lookup_page_ext(struct page *page);
+
+#else /* !CONFIG_PAGE_EXTENSION */
+struct page_ext;
+
+static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_ext *lookup_page_ext(struct page *page)
+{
+ return NULL;
+}
+
+static inline void page_ext_init(void)
+{
+}
+
+static inline void page_ext_init_flatmem(void)
+{
+}
+#endif /* CONFIG_PAGE_EXTENSION */
+#endif /* __LINUX_PAGE_EXT_H */
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
new file mode 100644
index 000000000000..b48c3471c254
--- /dev/null
+++ b/include/linux/page_owner.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_PAGE_OWNER_H
+#define __LINUX_PAGE_OWNER_H
+
+#ifdef CONFIG_PAGE_OWNER
+extern bool page_owner_inited;
+extern struct page_ext_operations page_owner_ops;
+
+extern void __reset_page_owner(struct page *page, unsigned int order);
+extern void __set_page_owner(struct page *page,
+ unsigned int order, gfp_t gfp_mask);
+
+static inline void reset_page_owner(struct page *page, unsigned int order)
+{
+ if (likely(!page_owner_inited))
+ return;
+
+ __reset_page_owner(page, order);
+}
+
+static inline void set_page_owner(struct page *page,
+ unsigned int order, gfp_t gfp_mask)
+{
+ if (likely(!page_owner_inited))
+ return;
+
+ __set_page_owner(page, order, gfp_mask);
+}
+#else
+static inline void reset_page_owner(struct page *page, unsigned int order)
+{
+}
+static inline void set_page_owner(struct page *page,
+ unsigned int order, gfp_t gfp_mask)
+{
+}
+
+#endif /* CONFIG_PAGE_OWNER */
+#endif /* __LINUX_PAGE_OWNER_H */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 420032d41d27..57f3a1c550dc 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -254,8 +254,6 @@ do { \
#endif /* CONFIG_SMP */
#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu))
-#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var)))
-#define __get_cpu_var(var) (*this_cpu_ptr(&(var)))
/*
* Must be an lvalue. Since @var must be a simple identifier,
diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 0a260d8a18bf..18102529254e 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -17,14 +17,20 @@ struct ratelimit_state {
unsigned long begin;
};
-#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \
- \
- struct ratelimit_state name = { \
+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \
.lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
.interval = interval_init, \
.burst = burst_init, \
}
+#define RATELIMIT_STATE_INIT_DISABLED \
+ RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \
+ \
+ struct ratelimit_state name = \
+ RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
+
static inline void ratelimit_state_init(struct ratelimit_state *rs,
int interval, int burst)
{
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55f5ee7cc3d3..8db31ef98d2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1364,6 +1364,10 @@ struct task_struct {
unsigned sched_reset_on_fork:1;
unsigned sched_contributes_to_load:1;
+#ifdef CONFIG_MEMCG_KMEM
+ unsigned memcg_kmem_skip_account:1;
+#endif
+
unsigned long atomic_flags; /* Flags needing atomic access. */
pid_t pid;
@@ -1679,8 +1683,7 @@ struct task_struct {
/* bitmask and counter of trace recursion */
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
-#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
- unsigned int memcg_kmem_skip_account;
+#ifdef CONFIG_MEMCG
struct memcg_oom_info {
struct mem_cgroup *memcg;
gfp_t gfp_mask;
@@ -2482,6 +2485,10 @@ extern void do_group_exit(int);
extern int do_execve(struct filename *,
const char __user * const __user *,
const char __user * const __user *);
+extern int do_execveat(int, struct filename *,
+ const char __user * const __user *,
+ const char __user * const __user *,
+ int);
extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 68c097077ef0..f4aee75f00b1 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -18,8 +18,6 @@ struct shrink_control {
*/
unsigned long nr_to_scan;
- /* shrink from these nodes */
- nodemask_t nodes_to_scan;
/* current node being shrunk (for NUMA aware shrinkers) */
int nid;
};
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 8a2457d42fc8..9a139b637069 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -493,7 +493,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* @memcg: pointer to the memcg this cache belongs to
* @list: list_head for the list of all caches in this memcg
* @root_cache: pointer to the global, root cache, this cache was derived from
- * @nr_pages: number of pages that belongs to this cache.
*/
struct memcg_cache_params {
bool is_root_cache;
@@ -506,7 +505,6 @@ struct memcg_cache_params {
struct mem_cgroup *memcg;
struct list_head list;
struct kmem_cache *root_cache;
- atomic_t nr_pages;
};
};
};
diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 115b570e3bff..669045ab73f3 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_STACKTRACE_H
#define __LINUX_STACKTRACE_H
+#include <linux/types.h>
+
struct task_struct;
struct pt_regs;
@@ -20,6 +22,8 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
struct stack_trace *trace);
extern void print_stack_trace(struct stack_trace *trace, int spaces);
+extern int snprint_stack_trace(char *buf, size_t size,
+ struct stack_trace *trace, int spaces);
#ifdef CONFIG_USER_STACKTRACE_SUPPORT
extern void save_stack_trace_user(struct stack_trace *trace);
@@ -32,6 +36,7 @@ extern void save_stack_trace_user(struct stack_trace *trace);
# define save_stack_trace_tsk(tsk, trace) do { } while (0)
# define save_stack_trace_user(trace) do { } while (0)
# define print_stack_trace(trace, spaces) do { } while (0)
+# define snprint_stack_trace(buf, size, trace, spaces) do { } while (0)
#endif
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 37a585beef5c..34e8b60ab973 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -102,14 +102,6 @@ union swap_header {
} info;
};
- /* A swap entry has to fit into a "unsigned long", as
- * the entry is hidden in the "index" field of the
- * swapper address space.
- */
-typedef struct {
- unsigned long val;
-} swp_entry_t;
-
/*
* current->reclaim_state points to one of these when a task is running
* memory reclaim
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c9afdc7a7f84..85893d744901 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -877,4 +877,9 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
asmlinkage long sys_getrandom(char __user *buf, size_t count,
unsigned int flags);
asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+
+asmlinkage long sys_execveat(int dfd, const char __user *filename,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, int flags);
+
#endif
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 730334cdf037..9246d32dc973 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -90,6 +90,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
#ifdef CONFIG_DEBUG_VM_VMACACHE
VMACACHE_FIND_CALLS,
VMACACHE_FIND_HITS,
+ VMACACHE_FULL_FLUSHES,
#endif
NR_VM_EVENT_ITEMS
};
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 22749c134117..e016bd9b1a04 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -707,9 +707,11 @@ __SYSCALL(__NR_getrandom, sys_getrandom)
__SYSCALL(__NR_memfd_create, sys_memfd_create)
#define __NR_bpf 280
__SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 281
+__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
#undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 282
/*
* All syscalls below here should go away really,
diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h
index a70375526578..f51c8001dbe5 100644
--- a/include/uapi/linux/msg.h
+++ b/include/uapi/linux/msg.h
@@ -51,16 +51,28 @@ struct msginfo {
};
/*
- * Scaling factor to compute msgmni:
- * the memory dedicated to msg queues (msgmni * msgmnb) should occupy
- * at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c):
- * up to 8MB : msgmni = 16 (MSGMNI)
- * 4 GB : msgmni = 8K
- * more than 16 GB : msgmni = 32K (IPCMNI)
+ * MSGMNI, MSGMAX and MSGMNB are default values which can be
+ * modified by sysctl.
+ *
+ * MSGMNI is the upper limit for the number of messages queues per
+ * namespace.
+ * It has been chosen to be as large possible without facilitating
+ * scenarios where userspace causes overflows when adjusting the limits via
+ * operations of the form retrieve current limit; add X; update limit".
+ *
+ * MSGMNB is the default size of a new message queue. Non-root tasks can
+ * decrease the size with msgctl(IPC_SET), root tasks
+ * (actually: CAP_SYS_RESOURCE) can both increase and decrease the queue
+ * size. The optimal value is application dependent.
+ * 16384 is used because it was always used (since 0.99.10)
+ *
+ * MAXMAX is the maximum size of an individual message, it's a global
+ * (per-namespace) limit that applies for all message queues.
+ * It's set to 1/2 of MSGMNB, to ensure that at least two messages fit into
+ * the queue. This is also an arbitrary choice (since 2.6.0).
*/
-#define MSG_MEM_SCALE 32
-#define MSGMNI 16 /* <= IPCMNI */ /* max # of msg queue identifiers */
+#define MSGMNI 32000 /* <= IPCMNI */ /* max # of msg queue identifiers */
#define MSGMAX 8192 /* <= INT_MAX */ /* max size of message (bytes) */
#define MSGMNB 16384 /* <= INT_MAX */ /* default max size of a message queue */
diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h
index 541fce03b50c..dd73b908b2f3 100644
--- a/include/uapi/linux/sem.h
+++ b/include/uapi/linux/sem.h
@@ -63,10 +63,22 @@ struct seminfo {
int semaem;
};
-#define SEMMNI 128 /* <= IPCMNI max # of semaphore identifiers */
-#define SEMMSL 250 /* <= 8 000 max num of semaphores per id */
+/*
+ * SEMMNI, SEMMSL and SEMMNS are default values which can be
+ * modified by sysctl.
+ * The values has been chosen to be larger than necessary for any
+ * known configuration.
+ *
+ * SEMOPM should not be increased beyond 1000, otherwise there is the
+ * risk that semop()/semtimedop() fails due to kernel memory fragmentation when
+ * allocating the sop array.
+ */
+
+
+#define SEMMNI 32000 /* <= IPCMNI max # of semaphore identifiers */
+#define SEMMSL 32000 /* <= INT_MAX max num of semaphores per id */
#define SEMMNS (SEMMNI*SEMMSL) /* <= INT_MAX max # of semaphores in system */
-#define SEMOPM 32 /* <= 1 000 max num of ops per semop call */
+#define SEMOPM 500 /* <= 1 000 max num of ops per semop call */
#define SEMVMX 32767 /* <= 32767 semaphore maximum value */
#define SEMAEM SEMVMX /* adjust on exit max value */