summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-swap10
-rw-r--r--drivers/base/node.c12
-rw-r--r--drivers/tty/tty_ldisc.c11
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c6
-rw-r--r--fs/mpage.c14
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/kernel.h90
-rw-r--r--include/linux/of.h10
-rw-r--r--include/linux/thread_info.h2
-rw-r--r--kernel/fork.c4
-rw-r--r--lib/Kconfig.debug143
-rw-r--r--mm/cma.c2
-rw-r--r--mm/madvise.c7
-rw-r--r--mm/mempolicy.c7
-rw-r--r--mm/migrate.c3
-rw-r--r--mm/page_vma_mapped.c28
-rw-r--r--mm/swap_state.c41
-rw-r--r--mm/vmalloc.c6
-rw-r--r--scripts/kallsyms.c2
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c25
21 files changed, 245 insertions, 181 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-swap b/Documentation/ABI/testing/sysfs-kernel-mm-swap
index 587db52084c7..94672016c268 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-swap
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-swap
@@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead.
still used for tmpfs etc. other users. If set to
false, the global swap readahead algorithm will be
used for all swappable pages.
-
-What: /sys/kernel/mm/swap/vma_ra_max_order
-Date: August 2017
-Contact: Linux memory management mailing list <linux-mm@kvack.org>
-Description: The max readahead size in order for VMA based swap readahead
-
- VMA based swap readahead algorithm will readahead at
- most 1 << max_order pages for each readahead. The
- real readahead size for each readahead will be scaled
- according to the estimation algorithm.
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 3855902f2c5b..aae2402f3791 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -27,13 +27,21 @@ static struct bus_type node_subsys = {
static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
{
+ ssize_t n;
+ cpumask_var_t mask;
struct node *node_dev = to_node(dev);
- const struct cpumask *mask = cpumask_of_node(node_dev->dev.id);
/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
- return cpumap_print_to_pagebuf(list, buf, mask);
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return 0;
+
+ cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
+ n = cpumap_print_to_pagebuf(list, buf, mask);
+ free_cpumask_var(mask);
+
+ return n;
}
static inline ssize_t node_read_cpumask(struct device *dev,
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 2fe216b276e2..84a8ac2a779f 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -694,10 +694,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
tty_set_termios_ldisc(tty, disc);
retval = tty_ldisc_open(tty, tty->ldisc);
if (retval) {
- if (!WARN_ON(disc == N_TTY)) {
- tty_ldisc_put(tty->ldisc);
- tty->ldisc = NULL;
- }
+ tty_ldisc_put(tty->ldisc);
+ tty->ldisc = NULL;
}
return retval;
}
@@ -752,8 +750,9 @@ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit)
if (tty->ldisc) {
if (reinit) {
- if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0)
- tty_ldisc_reinit(tty, N_TTY);
+ if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 &&
+ tty_ldisc_reinit(tty, N_TTY) < 0)
+ WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0);
} else
tty_ldisc_kill(tty);
}
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 2a46762def31..a7c5a9861bef 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode)
{
Node *e = inode->i_private;
- if (e->flags & MISC_FMT_OPEN_FILE)
+ if (e && e->flags & MISC_FMT_OPEN_FILE)
filp_close(e->interp_file, NULL);
clear_inode(inode);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 93d088ffc05c..789f55e851ae 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
set_page_writeback(page);
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
- if (result)
+ if (result) {
end_page_writeback(page);
- else
+ } else {
+ clean_page_buffers(page);
unlock_page(page);
+ }
blk_queue_exit(bdev->bd_queue);
return result;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index 37bb77c1302c..c991faec70b9 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped)
try_to_free_buffers(page);
}
+/*
+ * For situations where we want to clean all buffers attached to a page.
+ * We don't need to calculate how many buffers are attached to the page,
+ * we just need to specify a number larger than the maximum number of buffers.
+ */
+void clean_page_buffers(struct page *page)
+{
+ clean_buffers(page, ~0U);
+}
+
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -605,10 +615,8 @@ alloc_new:
if (bio == NULL) {
if (first_unmapped == blocks_per_page) {
if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
- page, wbc)) {
- clean_buffers(page, first_unmapped);
+ page, wbc))
goto out;
- }
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index c8dae555eccf..446b24cac67d 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -232,6 +232,7 @@ int generic_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
+void clean_page_buffers(struct page *page);
int cont_write_begin(struct file *, struct address_space *, loff_t,
unsigned, unsigned, struct page **, void **,
get_block_t *, loff_t *);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0ad4c3044cf9..91189bb0c818 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -44,6 +44,12 @@
#define STACK_MAGIC 0xdeadbeef
+/**
+ * REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
+ * @x: value to repeat
+ *
+ * NOTE: @x is not checked for > 0xff; larger values produce odd results.
+ */
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
/* @a is a power of 2 value */
@@ -57,6 +63,10 @@
#define READ 0
#define WRITE 1
+/**
+ * ARRAY_SIZE - get the number of elements in array @arr
+ * @arr: array to be sized
+ */
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
#define u64_to_user_ptr(x) ( \
@@ -76,7 +86,15 @@
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
#define round_down(x, y) ((x) & ~__round_mask(x, y))
+/**
+ * FIELD_SIZEOF - get the size of a struct's field
+ * @t: the target struct
+ * @f: the target struct's field
+ * Return: the size of @f in the struct definition without having a
+ * declared instance of @t.
+ */
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+
#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP
#define DIV_ROUND_DOWN_ULL(ll, d) \
@@ -107,7 +125,7 @@
/*
* Divide positive or negative dividend by positive or negative divisor
* and round to closest integer. Result is undefined for negative
- * divisors if he dividend variable type is unsigned and for negative
+ * divisors if the dividend variable type is unsigned and for negative
* dividends if the divisor variable type is unsigned.
*/
#define DIV_ROUND_CLOSEST(x, divisor)( \
@@ -247,13 +265,13 @@ extern int _cond_resched(void);
* @ep_ro: right open interval endpoint
*
* Perform a "reciprocal multiplication" in order to "scale" a value into
- * range [0, ep_ro), where the upper interval endpoint is right-open.
+ * range [0, @ep_ro), where the upper interval endpoint is right-open.
* This is useful, e.g. for accessing a index of an array containing
- * ep_ro elements, for example. Think of it as sort of modulus, only that
+ * @ep_ro elements, for example. Think of it as sort of modulus, only that
* the result isn't that of modulo. ;) Note that if initial input is a
* small value, then result will return 0.
*
- * Return: a result based on val in interval [0, ep_ro).
+ * Return: a result based on @val in interval [0, @ep_ro).
*/
static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
{
@@ -618,8 +636,8 @@ do { \
* trace_printk - printf formatting in the ftrace buffer
* @fmt: the printf format for printing
*
- * Note: __trace_printk is an internal function for trace_printk and
- * the @ip is passed in via the trace_printk macro.
+ * Note: __trace_printk is an internal function for trace_printk() and
+ * the @ip is passed in via the trace_printk() macro.
*
* This function allows a kernel developer to debug fast path sections
* that printk is not appropriate for. By scattering in various
@@ -629,7 +647,7 @@ do { \
* This is intended as a debugging tool for the developer only.
* Please refrain from leaving trace_printks scattered around in
* your code. (Extra memory is used for special buffers that are
- * allocated when trace_printk() is used)
+ * allocated when trace_printk() is used.)
*
* A little optization trick is done here. If there's only one
* argument, there's no need to scan the string for printf formats.
@@ -681,7 +699,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
* the @ip is passed in via the trace_puts macro.
*
* This is similar to trace_printk() but is made for those really fast
- * paths that a developer wants the least amount of "Heisenbug" affects,
+ * paths that a developer wants the least amount of "Heisenbug" effects,
* where the processing of the print format is still too much.
*
* This function allows a kernel developer to debug fast path sections
@@ -692,7 +710,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
* This is intended as a debugging tool for the developer only.
* Please refrain from leaving trace_puts scattered around in
* your code. (Extra memory is used for special buffers that are
- * allocated when trace_puts() is used)
+ * allocated when trace_puts() is used.)
*
* Returns: 0 if nothing was written, positive # if string was.
* (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
@@ -771,6 +789,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
t2 min2 = (y); \
(void) (&min1 == &min2); \
min1 < min2 ? min1 : min2; })
+
+/**
+ * min - return minimum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
#define min(x, y) \
__min(typeof(x), typeof(y), \
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
@@ -781,12 +805,31 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
t2 max2 = (y); \
(void) (&max1 == &max2); \
max1 > max2 ? max1 : max2; })
+
+/**
+ * max - return maximum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
#define max(x, y) \
__max(typeof(x), typeof(y), \
__UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \
x, y)
+/**
+ * min3 - return minimum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
#define min3(x, y, z) min((typeof(x))min(x, y), z)
+
+/**
+ * max3 - return maximum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
@@ -805,8 +848,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @lo: lowest allowable value
* @hi: highest allowable value
*
- * This macro does strict typechecking of lo/hi to make sure they are of the
- * same type as val. See the unnecessary pointer comparisons.
+ * This macro does strict typechecking of @lo/@hi to make sure they are of the
+ * same type as @val. See the unnecessary pointer comparisons.
*/
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
@@ -816,11 +859,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
*
* Or not use min/max/clamp at all, of course.
*/
+
+/**
+ * min_t - return minimum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
#define min_t(type, x, y) \
__min(type, type, \
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
x, y)
+/**
+ * max_t - return maximum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
#define max_t(type, x, y) \
__max(type, type, \
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
@@ -834,7 +890,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of type
- * 'type' to make all the comparisons.
+ * @type to make all the comparisons.
*/
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
@@ -845,15 +901,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of whatever
- * type the input argument 'val' is. This is useful when val is an unsigned
- * type and min and max are literals that will otherwise be assigned a signed
+ * type the input argument @val is. This is useful when @val is an unsigned
+ * type and @lo and @hi are literals that will otherwise be assigned a signed
* integer type.
*/
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
-/*
- * swap - swap value of @a and @b
+/**
+ * swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
*/
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
diff --git a/include/linux/of.h b/include/linux/of.h
index cfc34117fc92..b240ed69dc96 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -734,6 +734,16 @@ static inline struct device_node *of_get_cpu_node(int cpu,
return NULL;
}
+static inline int of_n_addr_cells(struct device_node *np)
+{
+ return 0;
+
+}
+static inline int of_n_size_cells(struct device_node *np)
+{
+ return 0;
+}
+
static inline int of_property_read_u64(const struct device_node *np,
const char *propname, u64 *out_value)
{
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index 905d769d8ddc..5f7eeab990fe 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -42,7 +42,7 @@ enum {
#define THREAD_ALIGN THREAD_SIZE
#endif
-#ifdef CONFIG_DEBUG_STACK_USAGE
+#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
__GFP_ZERO)
#else
diff --git a/kernel/fork.c b/kernel/fork.c
index e702cb9ffbd8..07cc743698d3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -215,6 +215,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!s)
continue;
+#ifdef CONFIG_DEBUG_KMEMLEAK
+ /* Clear stale pointers from reused stack. */
+ memset(s->addr, 0, THREAD_SIZE);
+#endif
tsk->stack_vm_area = s;
return s->addr;
}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 2689b7c50c52..c1e720a22c71 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1590,6 +1590,54 @@ config LATENCYTOP
source kernel/trace/Kconfig
+config PROVIDE_OHCI1394_DMA_INIT
+ bool "Remote debugging over FireWire early on boot"
+ depends on PCI && X86
+ help
+ If you want to debug problems which hang or crash the kernel early
+ on boot and the crashing machine has a FireWire port, you can use
+ this feature to remotely access the memory of the crashed machine
+ over FireWire. This employs remote DMA as part of the OHCI1394
+ specification which is now the standard for FireWire controllers.
+
+ With remote DMA, you can monitor the printk buffer remotely using
+ firescope and access all memory below 4GB using fireproxy from gdb.
+ Even controlling a kernel debugger is possible using remote DMA.
+
+ Usage:
+
+ If ohci1394_dma=early is used as boot parameter, it will initialize
+ all OHCI1394 controllers which are found in the PCI config space.
+
+ As all changes to the FireWire bus such as enabling and disabling
+ devices cause a bus reset and thereby disable remote DMA for all
+ devices, be sure to have the cable plugged and FireWire enabled on
+ the debugging host before booting the debug target for debugging.
+
+ This code (~1k) is freed after boot. By then, the firewire stack
+ in charge of the OHCI-1394 controllers should be used instead.
+
+ See Documentation/debugging-via-ohci1394.txt for more information.
+
+config DMA_API_DEBUG
+ bool "Enable debugging of DMA-API usage"
+ depends on HAVE_DMA_API_DEBUG
+ help
+ Enable this option to debug the use of the DMA API by device drivers.
+ With this option you will be able to detect common bugs in device
+ drivers like double-freeing of DMA mappings or freeing mappings that
+ were never allocated.
+
+ This also attempts to catch cases where a page owned by DMA is
+ accessed by the cpu in a way that could cause data corruption. For
+ example, this enables cow_user_page() to check that the source page is
+ not undergoing DMA.
+
+ This option causes a performance degradation. Use only if you want to
+ debug device drivers and dma interactions.
+
+ If unsure, say N.
+
menu "Runtime Testing"
config LKDTM
@@ -1749,56 +1797,6 @@ config TEST_PARMAN
If unsure, say N.
-endmenu # runtime tests
-
-config PROVIDE_OHCI1394_DMA_INIT
- bool "Remote debugging over FireWire early on boot"
- depends on PCI && X86
- help
- If you want to debug problems which hang or crash the kernel early
- on boot and the crashing machine has a FireWire port, you can use
- this feature to remotely access the memory of the crashed machine
- over FireWire. This employs remote DMA as part of the OHCI1394
- specification which is now the standard for FireWire controllers.
-
- With remote DMA, you can monitor the printk buffer remotely using
- firescope and access all memory below 4GB using fireproxy from gdb.
- Even controlling a kernel debugger is possible using remote DMA.
-
- Usage:
-
- If ohci1394_dma=early is used as boot parameter, it will initialize
- all OHCI1394 controllers which are found in the PCI config space.
-
- As all changes to the FireWire bus such as enabling and disabling
- devices cause a bus reset and thereby disable remote DMA for all
- devices, be sure to have the cable plugged and FireWire enabled on
- the debugging host before booting the debug target for debugging.
-
- This code (~1k) is freed after boot. By then, the firewire stack
- in charge of the OHCI-1394 controllers should be used instead.
-
- See Documentation/debugging-via-ohci1394.txt for more information.
-
-config DMA_API_DEBUG
- bool "Enable debugging of DMA-API usage"
- depends on HAVE_DMA_API_DEBUG
- help
- Enable this option to debug the use of the DMA API by device drivers.
- With this option you will be able to detect common bugs in device
- drivers like double-freeing of DMA mappings or freeing mappings that
- were never allocated.
-
- This also attempts to catch cases where a page owned by DMA is
- accessed by the cpu in a way that could cause data corruption. For
- example, this enables cow_user_page() to check that the source page is
- not undergoing DMA.
-
- This option causes a performance degradation. Use only if you want to
- debug device drivers and dma interactions.
-
- If unsure, say N.
-
config TEST_LKM
tristate "Test module loading with 'hello world' module"
default n
@@ -1873,18 +1871,6 @@ config TEST_UDELAY
If unsure, say N.
-config MEMTEST
- bool "Memtest"
- depends on HAVE_MEMBLOCK
- ---help---
- This option adds a kernel parameter 'memtest', which allows memtest
- to be set.
- memtest=0, mean disabled; -- default
- memtest=1, mean do 1 test pattern;
- ...
- memtest=17, mean do 17 test patterns.
- If you are unsure how to answer this question, answer N.
-
config TEST_STATIC_KEYS
tristate "Test static keys"
default n
@@ -1894,16 +1880,6 @@ config TEST_STATIC_KEYS
If unsure, say N.
-config BUG_ON_DATA_CORRUPTION
- bool "Trigger a BUG when data corruption is detected"
- select DEBUG_LIST
- help
- Select this option if the kernel should BUG when it encounters
- data corruption in kernel memory structures when they get checked
- for validity.
-
- If unsure, say N.
-
config TEST_KMOD
tristate "kmod stress tester"
default n
@@ -1941,6 +1917,29 @@ config TEST_DEBUG_VIRTUAL
If unsure, say N.
+endmenu # runtime tests
+
+config MEMTEST
+ bool "Memtest"
+ depends on HAVE_MEMBLOCK
+ ---help---
+ This option adds a kernel parameter 'memtest', which allows memtest
+ to be set.
+ memtest=0, mean disabled; -- default
+ memtest=1, mean do 1 test pattern;
+ ...
+ memtest=17, mean do 17 test patterns.
+ If you are unsure how to answer this question, answer N.
+
+config BUG_ON_DATA_CORRUPTION
+ bool "Trigger a BUG when data corruption is detected"
+ select DEBUG_LIST
+ help
+ Select this option if the kernel should BUG when it encounters
+ data corruption in kernel memory structures when they get checked
+ for validity.
+
+ If unsure, say N.
source "samples/Kconfig"
diff --git a/mm/cma.c b/mm/cma.c
index c0da318c020e..022e52bd8370 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
trace_cma_alloc(pfn, page, count, align);
- if (ret) {
+ if (ret && !(gfp_mask & __GFP_NOWARN)) {
pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
__func__, count, ret);
cma_debug_show_areas(cma);
diff --git a/mm/madvise.c b/mm/madvise.c
index 25bade36e9ca..fd70d6aabc3e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -757,6 +757,9 @@ madvise_behavior_valid(int behavior)
* MADV_DONTFORK - omit this area from child's address space when forking:
* typically, to avoid COWing pages pinned by get_user_pages().
* MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ * MADV_WIPEONFORK - present the child process with zero-filled memory in this
+ * range after a fork.
+ * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
* MADV_HWPOISON - trigger memory error handler as if the given memory range
* were corrupted by unrecoverable hardware memory failure.
* MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
@@ -777,7 +780,9 @@ madvise_behavior_valid(int behavior)
* zero - success
* -EINVAL - start + len < 0, start is not page-aligned,
* "behavior" is not a valid value, or application
- * is attempting to release locked or shared pages.
+ * is attempting to release locked or shared pages,
+ * or the specified address range includes file, Huge TLB,
+ * MAP_SHARED or VMPFNMAP range.
* -ENOMEM - addresses in the specified range are not currently
* mapped, or are outside the AS of the process.
* -EIO - an I/O error occurred while paging in data.
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 006ba625c0b8..a2af6d58a68f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
struct page *page;
page = __alloc_pages(gfp, order, nid);
- if (page && page_to_nid(page) == nid)
- inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
+ if (page && page_to_nid(page) == nid) {
+ preempt_disable();
+ __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
+ preempt_enable();
+ }
return page;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 6954c1435833..e00814ca390e 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2146,8 +2146,9 @@ static int migrate_vma_collect_hole(unsigned long start,
unsigned long addr;
for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
- migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE;
+ migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
+ migrate->npages++;
migrate->cpages++;
}
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 6a03946469a9..53afbb919a1c 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -6,17 +6,6 @@
#include "internal.h"
-static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
-{
- pmd_t pmde;
- /*
- * Make sure we don't re-load pmd between present and !trans_huge check.
- * We need a consistent view.
- */
- pmde = READ_ONCE(*pvmw->pmd);
- return pmd_present(pmde) && !pmd_trans_huge(pmde);
-}
-
static inline bool not_found(struct page_vma_mapped_walk *pvmw)
{
page_vma_mapped_walk_done(pvmw);
@@ -116,6 +105,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
+ pmd_t pmde;
/* The only possible pmd mapping has been handled on last iteration */
if (pvmw->pmd && !pvmw->pte)
@@ -148,7 +138,13 @@ restart:
if (!pud_present(*pud))
return false;
pvmw->pmd = pmd_offset(pud, pvmw->address);
- if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) {
+ /*
+ * Make sure the pmd value isn't cached in a register by the
+ * compiler and used as a stale value after we've observed a
+ * subsequent update.
+ */
+ pmde = READ_ONCE(*pvmw->pmd);
+ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
pvmw->ptl = pmd_lock(mm, pvmw->pmd);
if (likely(pmd_trans_huge(*pvmw->pmd))) {
if (pvmw->flags & PVMW_MIGRATION)
@@ -167,17 +163,15 @@ restart:
return not_found(pvmw);
return true;
}
- } else
- WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
+ }
return not_found(pvmw);
} else {
/* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
}
- } else {
- if (!check_pmd(pvmw))
- return false;
+ } else if (!pmd_present(pmde)) {
+ return false;
}
if (!map_pte(pvmw))
goto next_pte;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ed91091d1e68..05b6803f0cce 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
bool swap_vma_readahead = true;
-#define SWAP_RA_MAX_ORDER_DEFAULT 3
-
-static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
-
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
@@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
pte_t *tpte;
#endif
+ max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
+ SWAP_RA_ORDER_CEILING);
+ if (max_win == 1) {
+ swap_ra->win = 1;
+ return NULL;
+ }
+
faddr = vmf->address;
entry = pte_to_swp_entry(vmf->orig_pte);
if ((unlikely(non_swap_entry(entry))))
@@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
if (page)
return page;
- max_win = 1 << READ_ONCE(swap_ra_max_order);
- if (max_win == 1) {
- swap_ra->win = 1;
- return NULL;
- }
-
fpfn = PFN_DOWN(faddr);
swap_ra_info = GET_SWAP_RA_VAL(vma);
pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
vma_ra_enabled_store);
-static ssize_t vma_ra_max_order_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return sprintf(buf, "%d\n", swap_ra_max_order);
-}
-static ssize_t vma_ra_max_order_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
-{
- int err, v;
-
- err = kstrtoint(buf, 10, &v);
- if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
- return -EINVAL;
-
- swap_ra_max_order = v;
-
- return count;
-}
-static struct kobj_attribute vma_ra_max_order_attr =
- __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
- vma_ra_max_order_store);
-
static struct attribute *swap_attrs[] = {
&vma_ra_enabled_attr.attr,
- &vma_ra_max_order_attr.attr,
NULL,
};
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8a43db6284eb..673942094328 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
- if (fatal_signal_pending(current)) {
- area->nr_pages = i;
- goto fail_no_warn;
- }
-
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask|highmem_mask);
else
@@ -1723,7 +1718,6 @@ fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
-fail_no_warn:
vfree(area->addr);
return NULL;
}
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 5d554419170b..9ee9bf7fd1a2 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -158,7 +158,7 @@ static int read_symbol(FILE *in, struct sym_entry *s)
else if (str[0] == '$')
return -1;
/* exclude debugging symbols */
- else if (stype == 'N')
+ else if (stype == 'N' || stype == 'n')
return -1;
/* include the type field in the symbol name, so that it gets
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index a2c53a3d223d..de2f9ec8a87f 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -397,7 +397,7 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
}
}
-static int copy_page(int ufd, unsigned long offset)
+static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
@@ -418,7 +418,7 @@ static int copy_page(int ufd, unsigned long offset)
fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
uffdio_copy.copy), exit(1);
} else {
- if (test_uffdio_copy_eexist) {
+ if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
retry_copy_page(ufd, &uffdio_copy, offset);
}
@@ -427,6 +427,16 @@ static int copy_page(int ufd, unsigned long offset)
return 0;
}
+static int copy_page_retry(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, true);
+}
+
+static int copy_page(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, false);
+}
+
static void *uffd_poll_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
@@ -544,7 +554,7 @@ static void *background_thread(void *arg)
for (page_nr = cpu * nr_pages_per_cpu;
page_nr < (cpu+1) * nr_pages_per_cpu;
page_nr++)
- copy_page(uffd, page_nr * page_size);
+ copy_page_retry(uffd, page_nr * page_size);
return NULL;
}
@@ -779,7 +789,7 @@ static void retry_uffdio_zeropage(int ufd,
}
}
-static int uffdio_zeropage(int ufd, unsigned long offset)
+static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
{
struct uffdio_zeropage uffdio_zeropage;
int ret;
@@ -814,7 +824,7 @@ static int uffdio_zeropage(int ufd, unsigned long offset)
fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
uffdio_zeropage.zeropage), exit(1);
} else {
- if (test_uffdio_zeropage_eexist) {
+ if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
retry_uffdio_zeropage(ufd, &uffdio_zeropage,
offset);
@@ -830,6 +840,11 @@ static int uffdio_zeropage(int ufd, unsigned long offset)
return 0;
}
+static int uffdio_zeropage(int ufd, unsigned long offset)
+{
+ return __uffdio_zeropage(ufd, offset, false);
+}
+
/* exercise UFFDIO_ZEROPAGE */
static int userfaultfd_zeropage_test(void)
{