From 70f3ce0510afdad7cbaf27ab7ab961377205c782 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 29 Sep 2014 11:47:54 +0200 Subject: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers currently call spi_nor_match_id() and then spi_nor_scan(). This adds a dependency on struct spi_device_id which we want to avoid. Make spi_nor_scan() do it for them. Signed-off-by: Ben Hutchings Signed-off-by: Rafał Miłecki Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 9e6294f32ba8..a5a7a086748d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -187,32 +187,18 @@ struct spi_nor { /** * spi_nor_scan() - scan the SPI NOR * @nor: the spi_nor structure - * @id: the spi_device_id provided by the driver + * @name: the chip type name * @mode: the read mode supported by the driver * * The drivers can use this fuction to scan the SPI NOR. * In the scanning, it will try to get all the necessary information to * fill the mtd_info{} and the spi_nor{}. * - * The board may assigns a spi_device_id with @id which be used to compared with - * the spi_device_id detected by the scanning. + * The chip type name can be provided through the @name parameter. * * Return: 0 for success, others for failure. */ -int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id, - enum read_mode mode); +int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); extern const struct spi_device_id spi_nor_ids[]; -/** - * spi_nor_match_id() - find the spi_device_id by the name - * @name: the name of the spi_device_id - * - * The drivers use this function to find the spi_device_id - * specified by the @name. - * - * Return: returns the right spi_device_id pointer on success, - * and returns NULL on failure. - */ -const struct spi_device_id *spi_nor_match_id(char *name); - #endif -- cgit v1.2.3 From a5b7616c55e188fe3d6ef686bef402d4703ecb62 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 30 Sep 2014 03:14:55 +0100 Subject: mtd: m25p80,spi-nor: Fix module aliases for m25p80 m25p80's device ID table is now spi_nor_ids, defined in spi-nor. The MODULE_DEVICE_TABLE() macro doesn't work with extern definitions, but its use was also removed at the same time. Now if m25p80 is built as a module it doesn't get the necessary aliases to be loaded automatically. A clean solution to this will involve defining the list of device IDs in spi-nor.h and removing struct spi_device_id from the spi-nor API, but this is quite a large change. As a quick fix suitable for stable, copy the device IDs back into m25p80. Fixes: 03e296f613af ("mtd: m25p80: use the SPI nor framework") Cc: # 3.16.x: 32f1b7c8352f: mtd: move support for struct flash_platform_data into m25p80 Cc: # 3.16.x: 90e55b3812a1: mtd: m25p80: get rid of spi_get_device_id Cc: # 3.16.x: 70f3ce0510af: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id Cc: # 3.16.x Signed-off-by: Ben Hutchings Signed-off-by: Brian Norris --- drivers/mtd/devices/m25p80.c | 52 ++++++++++++++++++++++++++++++++++++++++++- drivers/mtd/spi-nor/spi-nor.c | 3 +-- include/linux/mtd/spi-nor.h | 1 - 3 files changed, 52 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c index bd5e4c6edfd4..ed827cf894e4 100644 --- a/drivers/mtd/devices/m25p80.c +++ b/drivers/mtd/devices/m25p80.c @@ -261,12 +261,62 @@ static int m25p_remove(struct spi_device *spi) } +/* + * XXX This needs to be kept in sync with spi_nor_ids. We can't share + * it with spi-nor, because if this is built as a module then modpost + * won't be able to read it and add appropriate aliases. + */ +static const struct spi_device_id m25p_ids[] = { + {"at25fs010"}, {"at25fs040"}, {"at25df041a"}, {"at25df321a"}, + {"at25df641"}, {"at26f004"}, {"at26df081a"}, {"at26df161a"}, + {"at26df321"}, {"at45db081d"}, + {"en25f32"}, {"en25p32"}, {"en25q32b"}, {"en25p64"}, + {"en25q64"}, {"en25qh128"}, {"en25qh256"}, + {"f25l32pa"}, + {"mr25h256"}, {"mr25h10"}, + {"gd25q32"}, {"gd25q64"}, + {"160s33b"}, {"320s33b"}, {"640s33b"}, + {"mx25l2005a"}, {"mx25l4005a"}, {"mx25l8005"}, {"mx25l1606e"}, + {"mx25l3205d"}, {"mx25l3255e"}, {"mx25l6405d"}, {"mx25l12805d"}, + {"mx25l12855e"},{"mx25l25635e"},{"mx25l25655e"},{"mx66l51235l"}, + {"mx66l1g55g"}, + {"n25q064"}, {"n25q128a11"}, {"n25q128a13"}, {"n25q256a"}, + {"n25q512a"}, {"n25q512ax3"}, {"n25q00"}, + {"pm25lv512"}, {"pm25lv010"}, {"pm25lq032"}, + {"s25sl032p"}, {"s25sl064p"}, {"s25fl256s0"}, {"s25fl256s1"}, + {"s25fl512s"}, {"s70fl01gs"}, {"s25sl12800"}, {"s25sl12801"}, + {"s25fl129p0"}, {"s25fl129p1"}, {"s25sl004a"}, {"s25sl008a"}, + {"s25sl016a"}, {"s25sl032a"}, {"s25sl064a"}, {"s25fl008k"}, + {"s25fl016k"}, {"s25fl064k"}, + {"sst25vf040b"},{"sst25vf080b"},{"sst25vf016b"},{"sst25vf032b"}, + {"sst25vf064c"},{"sst25wf512"}, {"sst25wf010"}, {"sst25wf020"}, + {"sst25wf040"}, + {"m25p05"}, {"m25p10"}, {"m25p20"}, {"m25p40"}, + {"m25p80"}, {"m25p16"}, {"m25p32"}, {"m25p64"}, + {"m25p128"}, {"n25q032"}, + {"m25p05-nonjedec"}, {"m25p10-nonjedec"}, {"m25p20-nonjedec"}, + {"m25p40-nonjedec"}, {"m25p80-nonjedec"}, {"m25p16-nonjedec"}, + {"m25p32-nonjedec"}, {"m25p64-nonjedec"}, {"m25p128-nonjedec"}, + {"m45pe10"}, {"m45pe80"}, {"m45pe16"}, + {"m25pe20"}, {"m25pe80"}, {"m25pe16"}, + {"m25px16"}, {"m25px32"}, {"m25px32-s0"}, {"m25px32-s1"}, + {"m25px64"}, + {"w25x10"}, {"w25x20"}, {"w25x40"}, {"w25x80"}, + {"w25x16"}, {"w25x32"}, {"w25q32"}, {"w25q32dw"}, + {"w25x64"}, {"w25q64"}, {"w25q128"}, {"w25q80"}, + {"w25q80bl"}, {"w25q128"}, {"w25q256"}, {"cat25c11"}, + {"cat25c03"}, {"cat25c09"}, {"cat25c17"}, {"cat25128"}, + { }, +}; +MODULE_DEVICE_TABLE(spi, m25p_ids); + + static struct spi_driver m25p80_driver = { .driver = { .name = "m25p80", .owner = THIS_MODULE, }, - .id_table = spi_nor_ids, + .id_table = m25p_ids, .probe = m25p_probe, .remove = m25p_remove, diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 5c8e39977bc5..c51ee52386a7 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -475,7 +475,7 @@ struct flash_info { * more nor chips. This current list focusses on newer chips, which * have been converging on command sets which including JEDEC ID. */ -const struct spi_device_id spi_nor_ids[] = { +static const struct spi_device_id spi_nor_ids[] = { /* Atmel -- some are (confusingly) marketed as "DataFlash" */ { "at25fs010", INFO(0x1f6601, 0, 32 * 1024, 4, SECT_4K) }, { "at25fs040", INFO(0x1f6604, 0, 64 * 1024, 8, SECT_4K) }, @@ -639,7 +639,6 @@ const struct spi_device_id spi_nor_ids[] = { { "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) }, { }, }; -EXPORT_SYMBOL_GPL(spi_nor_ids); static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor) { diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index a5a7a086748d..046a0a2e4c4e 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -199,6 +199,5 @@ struct spi_nor { * Return: 0 for success, others for failure. */ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode); -extern const struct spi_device_id spi_nor_ids[]; #endif -- cgit v1.2.3 From dda02fd6278d9e995850b3c1dba484f17cbe4de4 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Fri, 24 Oct 2014 17:47:57 +0800 Subject: mm, cma: make parameters order consistent in func declaration and definition In the current code, the base and size parameters order is not consistent in functions declaration and definition. If someone calls these functions according to the declaration parameters order in cma.h, he will run into some bug and it's hard to find the reason. This patch makes the parameters order consistent in functions declaration and definition. Signed-off-by: Weijie Yang Acked-by: Michal Nazarewicz Signed-off-by: Marek Szyprowski --- include/linux/cma.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 0430ed05d3b9..a93438beb33c 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,12 +18,12 @@ struct cma; extern phys_addr_t cma_get_base(struct cma *cma); extern unsigned long cma_get_size(struct cma *cma); -extern int __init cma_declare_contiguous(phys_addr_t size, - phys_addr_t base, phys_addr_t limit, +extern int __init cma_declare_contiguous(phys_addr_t base, + phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, bool fixed, struct cma **res_cma); -extern int cma_init_reserved_mem(phys_addr_t size, - phys_addr_t base, int order_per_bit, +extern int cma_init_reserved_mem(phys_addr_t base, + phys_addr_t size, int order_per_bit, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align); extern bool cma_release(struct cma *cma, struct page *pages, int count); -- cgit v1.2.3 From e999dbc254044e8d2a5818d92d205f65bae28f37 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 19 Oct 2014 17:13:57 +0200 Subject: Revert "block: all blk-mq requests are tagged" This reverts commit fb3ccb5da71273e7f0d50b50bc879e50cedd60e7. SCSI-2/SPI actually needs the tagged/untagged flag in the request to work properly. Revert this patch and add a follow on to set it in the right place. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Acked-by: Jens Axboe Reported-by: Meelis Roos Tested-by: Meelis Roos Cc: stable@vger.kernel.org --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0207a78a8d82..51d0dc2259cf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1136,8 +1136,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) /* * tag stuff */ -#define blk_rq_tagged(rq) \ - ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED)) +#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) extern int blk_queue_start_tag(struct request_queue *, struct request *); extern struct request *blk_queue_find_tag(struct request_queue *, int); extern void blk_queue_end_tag(struct request_queue *, struct request *); -- cgit v1.2.3 From 54ef6df3f3f1353d99c80c437259d317b2cd1cbd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 27 Oct 2014 21:11:27 -0700 Subject: rcu: Provide counterpart to rcu_dereference() for non-RCU situations Although rcu_dereference() and friends can be used in situations where object lifetimes are being managed by something other than RCU, the resulting sparse and lockdep-RCU noise can be annoying. This commit therefore supplies a lockless_dereference(), which provides the protection for dereferences without the RCU-related debugging noise. Reported-by: Al Viro Signed-off-by: Paul E. McKenney Signed-off-by: Al Viro --- include/linux/rcupdate.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index a4a819ffb2d1..53ff1a752d7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -616,6 +616,21 @@ static inline void rcu_preempt_sleep_check(void) */ #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) +/** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + /** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to -- cgit v1.2.3 From d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 27 Oct 2014 15:42:01 +0100 Subject: overlayfs: fix lockdep misannotation In an overlay directory that shadows an empty lower directory, say /mnt/a/empty102, do: touch /mnt/a/empty102/x unlink /mnt/a/empty102/x rmdir /mnt/a/empty102 It's actually harmless, but needs another level of nesting between I_MUTEX_CHILD and I_MUTEX_NORMAL. Signed-off-by: Miklos Szeredi Tested-by: David Howells Signed-off-by: Al Viro --- fs/namei.c | 2 +- fs/overlayfs/readdir.c | 2 +- include/linux/fs.h | 9 ++++++--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 42df664e95e5..922f27068c4c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2497,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) } mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); return NULL; } EXPORT_SYMBOL(lock_rename); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 3fbf0d306e12..401f0840f5cc 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -571,7 +571,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) { struct ovl_cache_entry *p; - mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); list_for_each_entry(p, list, l_node) { struct dentry *dentry; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4e41a4a331bb..01036262095f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -639,11 +639,13 @@ static inline int inode_unhashed(struct inode *inode) * 2: child/target * 3: xattr * 4: second non-directory - * The last is for certain operations (such as rename) which lock two + * 5: second parent (when locking independent directories in rename) + * + * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is - * parent -> child -> normal -> xattr -> second non-directory + * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { @@ -651,7 +653,8 @@ enum inode_i_mutex_lock_class I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, - I_MUTEX_NONDIR2 + I_MUTEX_NONDIR2, + I_MUTEX_PARENT2, }; void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v1.2.3 From b2de525f095708b2adbadaec3f1e4017a23d1e09 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Mon, 29 Sep 2014 10:21:10 -0400 Subject: Return short read or 0 at end of a raw device, not EIO Author: David Jeffery Changes to the basic direct I/O code have broken the raw driver when reading to the end of a raw device. Instead of returning a short read for a read that extends partially beyond the device's end or 0 when at the end of the device, these reads now return EIO. The raw driver needs the same end of device handling as was added for normal block devices. Using blkdev_read_iter, which has the needed size checks, prevents the EIO conditions at the end of the device. Signed-off-by: David Jeffery Signed-off-by: Al Viro --- drivers/char/raw.c | 2 +- fs/block_dev.c | 3 ++- include/linux/fs.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 0102dc788608..a24891b97547 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd, static const struct file_operations raw_fops = { .read = new_sync_read, - .read_iter = generic_file_read_iter, + .read_iter = blkdev_read_iter, .write = new_sync_write, .write_iter = blkdev_write_iter, .fsync = blkdev_fsync, diff --git a/fs/block_dev.c b/fs/block_dev.c index cc9d4114cda0..1d9c9f3754f8 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1585,7 +1585,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) } EXPORT_SYMBOL_GPL(blkdev_write_iter); -static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) +ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *bd_inode = file->f_mapping->host; @@ -1599,6 +1599,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) iov_iter_truncate(to, size); return generic_file_read_iter(iocb, to); } +EXPORT_SYMBOL_GPL(blkdev_read_iter); /* * Try to release a page associated with block device when the system diff --git a/include/linux/fs.h b/include/linux/fs.h index 01036262095f..9ab779e8a63c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2469,6 +2469,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); /* fs/block_dev.c */ +extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to); extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from); extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync); -- cgit v1.2.3 From a87fa1d81a9fb5e9adca9820e16008c40ad09f33 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Mon, 3 Nov 2014 15:15:35 +0000 Subject: of: Fix overflow bug in string property parsing functions The string property read helpers will run off the end of the buffer if it is handed a malformed string property. Rework the parsers to make sure that doesn't happen. At the same time add new test cases to make sure the functions behave themselves. The original implementations of of_property_read_string_index() and of_property_count_strings() both open-coded the same block of parsing code, each with it's own subtly different bugs. The fix here merges functions into a single helper and makes the original functions static inline wrappers around the helper. One non-bugfix aspect of this patch is the addition of a new wrapper, of_property_read_string_array(). The new wrapper is needed by the device_properties feature that Rafael is working on and planning to merge for v3.19. The implementation is identical both with and without the new static inline wrapper, so it just got left in to reduce the churn on the header file. Signed-off-by: Grant Likely Cc: Rafael J. Wysocki Cc: Mika Westerberg Cc: Rob Herring Cc: Arnd Bergmann Cc: Darren Hart Cc: # v3.3+: Drop selftest hunks that don't apply --- drivers/of/base.c | 88 ++++++++--------------------- drivers/of/selftest.c | 66 ++++++++++++++++++++-- drivers/of/testcase-data/tests-phandle.dtsi | 2 + include/linux/of.h | 84 ++++++++++++++++++++++----- 4 files changed, 154 insertions(+), 86 deletions(-) (limited to 'include/linux') diff --git a/drivers/of/base.c b/drivers/of/base.c index 2305dc0382bc..3823edf2d012 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1279,52 +1279,6 @@ int of_property_read_string(struct device_node *np, const char *propname, } EXPORT_SYMBOL_GPL(of_property_read_string); -/** - * of_property_read_string_index - Find and read a string from a multiple - * strings property. - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @index: index of the string in the list of strings - * @out_string: pointer to null terminated return string, modified only if - * return value is 0. - * - * Search for a property in a device tree node and retrieve a null - * terminated string value (pointer to data, not a copy) in the list of strings - * contained in that property. - * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if - * property does not have a value, and -EILSEQ if the string is not - * null-terminated within the length of the property data. - * - * The out_string pointer is modified only if a valid string can be decoded. - */ -int of_property_read_string_index(struct device_node *np, const char *propname, - int index, const char **output) -{ - struct property *prop = of_find_property(np, propname, NULL); - int i = 0; - size_t l = 0, total = 0; - const char *p; - - if (!prop) - return -EINVAL; - if (!prop->value) - return -ENODATA; - if (strnlen(prop->value, prop->length) >= prop->length) - return -EILSEQ; - - p = prop->value; - - for (i = 0; total < prop->length; total += l, p += l) { - l = strlen(p) + 1; - if (i++ == index) { - *output = p; - return 0; - } - } - return -ENODATA; -} -EXPORT_SYMBOL_GPL(of_property_read_string_index); - /** * of_property_match_string() - Find string in a list and return index * @np: pointer to node containing string list property @@ -1351,7 +1305,7 @@ int of_property_match_string(struct device_node *np, const char *propname, end = p + prop->length; for (i = 0; p < end; i++, p += l) { - l = strlen(p) + 1; + l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; pr_debug("comparing %s with %s\n", string, p); @@ -1363,39 +1317,41 @@ int of_property_match_string(struct device_node *np, const char *propname, EXPORT_SYMBOL_GPL(of_property_match_string); /** - * of_property_count_strings - Find and return the number of strings from a - * multiple strings property. + * of_property_read_string_util() - Utility helper for parsing string properties * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. + * @out_strs: output array of string pointers. + * @sz: number of array elements to read. + * @skip: Number of strings to skip over at beginning of list. * - * Search for a property in a device tree node and retrieve the number of null - * terminated string contain in it. Returns the number of strings on - * success, -EINVAL if the property does not exist, -ENODATA if property - * does not have a value, and -EILSEQ if the string is not null-terminated - * within the length of the property data. + * Don't call this function directly. It is a utility helper for the + * of_property_read_string*() family of functions. */ -int of_property_count_strings(struct device_node *np, const char *propname) +int of_property_read_string_helper(struct device_node *np, const char *propname, + const char **out_strs, size_t sz, int skip) { struct property *prop = of_find_property(np, propname, NULL); - int i = 0; - size_t l = 0, total = 0; - const char *p; + int l = 0, i = 0; + const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; - if (strnlen(prop->value, prop->length) >= prop->length) - return -EILSEQ; - p = prop->value; + end = p + prop->length; - for (i = 0; total < prop->length; total += l, p += l, i++) - l = strlen(p) + 1; - - return i; + for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) { + l = strnlen(p, end - p) + 1; + if (p + l > end) + return -EILSEQ; + if (out_strs && i >= skip) + *out_strs++ = p; + } + i -= skip; + return i <= 0 ? -ENODATA : i; } -EXPORT_SYMBOL_GPL(of_property_count_strings); +EXPORT_SYMBOL_GPL(of_property_read_string_helper); void of_print_phandle_args(const char *msg, const struct of_phandle_args *args) { diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c index 78001270a598..11b873c54a77 100644 --- a/drivers/of/selftest.c +++ b/drivers/of/selftest.c @@ -339,8 +339,9 @@ static void __init of_selftest_parse_phandle_with_args(void) selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc); } -static void __init of_selftest_property_match_string(void) +static void __init of_selftest_property_string(void) { + const char *strings[4]; struct device_node *np; int rc; @@ -357,13 +358,66 @@ static void __init of_selftest_property_match_string(void) rc = of_property_match_string(np, "phandle-list-names", "third"); selftest(rc == 2, "third expected:0 got:%i\n", rc); rc = of_property_match_string(np, "phandle-list-names", "fourth"); - selftest(rc == -ENODATA, "unmatched string; rc=%i", rc); + selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc); rc = of_property_match_string(np, "missing-property", "blah"); - selftest(rc == -EINVAL, "missing property; rc=%i", rc); + selftest(rc == -EINVAL, "missing property; rc=%i\n", rc); rc = of_property_match_string(np, "empty-property", "blah"); - selftest(rc == -ENODATA, "empty property; rc=%i", rc); + selftest(rc == -ENODATA, "empty property; rc=%i\n", rc); rc = of_property_match_string(np, "unterminated-string", "blah"); - selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc); + selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc); + + /* of_property_count_strings() tests */ + rc = of_property_count_strings(np, "string-property"); + selftest(rc == 1, "Incorrect string count; rc=%i\n", rc); + rc = of_property_count_strings(np, "phandle-list-names"); + selftest(rc == 3, "Incorrect string count; rc=%i\n", rc); + rc = of_property_count_strings(np, "unterminated-string"); + selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc); + rc = of_property_count_strings(np, "unterminated-string-list"); + selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc); + + /* of_property_read_string_index() tests */ + rc = of_property_read_string_index(np, "string-property", 0, strings); + selftest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "string-property", 1, strings); + selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "phandle-list-names", 0, strings); + selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "phandle-list-names", 1, strings); + selftest(rc == 0 && !strcmp(strings[0], "second"), "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "phandle-list-names", 2, strings); + selftest(rc == 0 && !strcmp(strings[0], "third"), "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "phandle-list-names", 3, strings); + selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "unterminated-string", 0, strings); + selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + rc = of_property_read_string_index(np, "unterminated-string-list", 0, strings); + selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc); + strings[0] = NULL; + rc = of_property_read_string_index(np, "unterminated-string-list", 2, strings); /* should fail */ + selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc); + strings[1] = NULL; + + /* of_property_read_string_array() tests */ + rc = of_property_read_string_array(np, "string-property", strings, 4); + selftest(rc == 1, "Incorrect string count; rc=%i\n", rc); + rc = of_property_read_string_array(np, "phandle-list-names", strings, 4); + selftest(rc == 3, "Incorrect string count; rc=%i\n", rc); + rc = of_property_read_string_array(np, "unterminated-string", strings, 4); + selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc); + /* -- An incorrectly formed string should cause a failure */ + rc = of_property_read_string_array(np, "unterminated-string-list", strings, 4); + selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc); + /* -- parsing the correctly formed strings should still work: */ + strings[2] = NULL; + rc = of_property_read_string_array(np, "unterminated-string-list", strings, 2); + selftest(rc == 2 && strings[2] == NULL, "of_property_read_string_array() failure; rc=%i\n", rc); + strings[1] = NULL; + rc = of_property_read_string_array(np, "phandle-list-names", strings, 1); + selftest(rc == 1 && strings[1] == NULL, "Overwrote end of string array; rc=%i, str='%s'\n", rc, strings[1]); } #define propcmp(p1, p2) (((p1)->length == (p2)->length) && \ @@ -881,7 +935,7 @@ static int __init of_selftest(void) of_selftest_find_node_by_name(); of_selftest_dynamic(); of_selftest_parse_phandle_with_args(); - of_selftest_property_match_string(); + of_selftest_property_string(); of_selftest_property_copy(); of_selftest_changeset(); of_selftest_parse_interrupts(); diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/testcase-data/tests-phandle.dtsi index ce0fe083d406..5b1527e8a7fb 100644 --- a/drivers/of/testcase-data/tests-phandle.dtsi +++ b/drivers/of/testcase-data/tests-phandle.dtsi @@ -39,7 +39,9 @@ phandle-list-bad-args = <&provider2 1 0>, <&provider3 0>; empty-property; + string-property = "foobar"; unterminated-string = [40 41 42 43]; + unterminated-string-list = "first", "second", [40 41 42 43]; }; }; }; diff --git a/include/linux/of.h b/include/linux/of.h index 6545e7aec7bb..29f0adc5f3e4 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -267,14 +267,12 @@ extern int of_property_read_u64(const struct device_node *np, extern int of_property_read_string(struct device_node *np, const char *propname, const char **out_string); -extern int of_property_read_string_index(struct device_node *np, - const char *propname, - int index, const char **output); extern int of_property_match_string(struct device_node *np, const char *propname, const char *string); -extern int of_property_count_strings(struct device_node *np, - const char *propname); +extern int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index); extern int of_device_is_compatible(const struct device_node *device, const char *); extern int of_device_is_available(const struct device_node *device); @@ -486,15 +484,9 @@ static inline int of_property_read_string(struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string_index(struct device_node *np, - const char *propname, int index, - const char **out_string) -{ - return -ENOSYS; -} - -static inline int of_property_count_strings(struct device_node *np, - const char *propname) +static inline int of_property_read_string_helper(struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) { return -ENOSYS; } @@ -667,6 +659,70 @@ static inline int of_property_count_u64_elems(const struct device_node *np, return of_property_count_elems_of_size(np, propname, sizeof(u64)); } +/** + * of_property_read_string_array() - Read an array of strings from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_strs: output array of string pointers. + * @sz: number of array elements to read. + * + * Search for a property in a device tree node and retrieve a list of + * terminated string values (pointer to data, not a copy) in that property. + * + * If @out_strs is NULL, the number of strings in the property is returned. + */ +static inline int of_property_read_string_array(struct device_node *np, + const char *propname, const char **out_strs, + size_t sz) +{ + return of_property_read_string_helper(np, propname, out_strs, sz, 0); +} + +/** + * of_property_count_strings() - Find and return the number of strings from a + * multiple strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * + * Search for a property in a device tree node and retrieve the number of null + * terminated string contain in it. Returns the number of strings on + * success, -EINVAL if the property does not exist, -ENODATA if property + * does not have a value, and -EILSEQ if the string is not null-terminated + * within the length of the property data. + */ +static inline int of_property_count_strings(struct device_node *np, + const char *propname) +{ + return of_property_read_string_helper(np, propname, NULL, 0, 0); +} + +/** + * of_property_read_string_index() - Find and read a string from a multiple + * strings property. + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @index: index of the string in the list of strings + * @out_string: pointer to null terminated return string, modified only if + * return value is 0. + * + * Search for a property in a device tree node and retrieve a null + * terminated string value (pointer to data, not a copy) in the list of strings + * contained in that property. + * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if + * property does not have a value, and -EILSEQ if the string is not + * null-terminated within the length of the property data. + * + * The out_string pointer is modified only if a valid string can be decoded. + */ +static inline int of_property_read_string_index(struct device_node *np, + const char *propname, + int index, const char **output) +{ + int rc = of_property_read_string_helper(np, propname, output, 1, index); + return rc < 0 ? rc : 0; +} + /** * of_property_read_bool - Findfrom a property * @np: device node from which the property value is to be read. -- cgit v1.2.3 From 32f638fc11db0526c706454d9ab4339d55ac89f3 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 30 Oct 2014 10:17:25 -0600 Subject: PCI: Don't oops on virtual buses in acpi_pci_get_bridge_handle() acpi_pci_get_bridge_handle() returns the ACPI handle for the bridge device (either a host bridge or a PCI-to-PCI bridge) leading to a PCI bus. But SR-IOV virtual functions can be on a virtual bus with no bridge leading to it. Return a NULL acpi_handle in this case instead of trying to dereference the NULL pointer to the bridge. This fixes a NULL pointer dereference oops in pci_get_hp_params() when adding SR-IOV VF devices on virtual buses. [bhelgaas: changelog, add comment in code] Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration") Link: https://bugzilla.kernel.org/show_bug.cgi?id=87591 Reported-by: Chao Zhou Reported-by: Joerg Roedel Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- include/linux/pci-acpi.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index 64dacb7288a6..24c7728ca681 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -41,8 +41,13 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) if (pci_is_root_bus(pbus)) dev = pbus->bridge; - else + else { + /* If pbus is a virtual bus, there is no bridge to it */ + if (!pbus->self) + return NULL; + dev = &pbus->self->dev; + } return ACPI_HANDLE(dev); } -- cgit v1.2.3 From c0acb8144bd6d8d88aee1dab33364b7353e9a903 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 10 Oct 2014 12:48:35 +0200 Subject: mfd: max77693: Fix always masked MUIC interrupts All interrupts coming from MUIC were ignored because interrupt source register was masked. The Maxim 77693 has a "interrupt source" - a separate register and interrupts which give information about PMIC block triggering the individual interrupt (charger, topsys, MUIC, flash LED). By default bootloader could initialize this register to "mask all" value. In such case (observed on Trats2 board) MUIC interrupts won't be generated regardless of their mask status. Regmap irq chip was unmasking individual MUIC interrupts but the source was masked Before introducing regmap irq chip this interrupt source was unmasked, read and acked. Reading and acking is not necessary but unmasking is. Fixes: 342d669c1ee4 ("mfd: max77693: Handle IRQs using regmap") Cc: Signed-off-by: Krzysztof Kozlowski Reviewed-by: Chanwoo Choi Signed-off-by: Lee Jones --- drivers/mfd/max77693.c | 12 ++++++++++++ include/linux/mfd/max77693-private.h | 7 +++++++ 2 files changed, 19 insertions(+) (limited to 'include/linux') diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c index 22d23fcbb65c..711773e8e64b 100644 --- a/drivers/mfd/max77693.c +++ b/drivers/mfd/max77693.c @@ -250,6 +250,17 @@ static int max77693_i2c_probe(struct i2c_client *i2c, goto err_irq_muic; } + /* Unmask interrupts from all blocks in interrupt source register */ + ret = regmap_update_bits(max77693->regmap, + MAX77693_PMIC_REG_INTSRC_MASK, + SRC_IRQ_ALL, (unsigned int)~SRC_IRQ_ALL); + if (ret < 0) { + dev_err(max77693->dev, + "Could not unmask interrupts in INTSRC: %d\n", + ret); + goto err_intsrc; + } + pm_runtime_set_active(max77693->dev); ret = mfd_add_devices(max77693->dev, -1, max77693_devs, @@ -261,6 +272,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c, err_mfd: mfd_remove_devices(max77693->dev); +err_intsrc: regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic); err_irq_muic: regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger); diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h index fc17d56581b2..582e67f34054 100644 --- a/include/linux/mfd/max77693-private.h +++ b/include/linux/mfd/max77693-private.h @@ -330,6 +330,13 @@ enum max77693_irq_source { MAX77693_IRQ_GROUP_NR, }; +#define SRC_IRQ_CHARGER BIT(0) +#define SRC_IRQ_TOP BIT(1) +#define SRC_IRQ_FLASH BIT(2) +#define SRC_IRQ_MUIC BIT(3) +#define SRC_IRQ_ALL (SRC_IRQ_CHARGER | SRC_IRQ_TOP \ + | SRC_IRQ_FLASH | SRC_IRQ_MUIC) + #define LED_IRQ_FLED2_OPEN BIT(0) #define LED_IRQ_FLED2_SHORT BIT(1) #define LED_IRQ_FLED1_OPEN BIT(2) -- cgit v1.2.3 From e30f53aad2202b5526c40c36d8eeac8bf290bde5 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 10 Nov 2014 19:46:34 +0100 Subject: tracing: Do not busy wait in buffer splice On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [] ? __wake_up_common+0x90/0x90 [] wait_on_pipe+0x35/0x40 [] tracing_buffers_splice_read+0x2e3/0x3c0 [] ? tracing_stats_read+0x2a0/0x2a0 [] ? _raw_spin_unlock+0x2b/0x40 [] ? do_read_fault+0x21b/0x290 [] ? handle_mm_fault+0x2ba/0xbd0 [] ? trace_event_buffer_lock_reserve+0x40/0x80 [] ? trace_buffer_lock_reserve+0x22/0x60 [] ? trace_event_buffer_lock_reserve+0x40/0x80 [] do_splice_to+0x6d/0x90 [] SyS_splice+0x7c1/0x800 [] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in Cc: stable@vger.kernel.org # 3.16+ Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 81 ++++++++++++++++++++++++++++++--------------- kernel/trace/trace.c | 23 ++++--------- 3 files changed, 62 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 49a4d6f59108..e2c13cd863bd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2d75c94ae87d..a56e07c8d15b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work) * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on + * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -int ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) { - struct ring_buffer_per_cpu *cpu_buffer; + struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); DEFINE_WAIT(wait); struct rb_irq_work *work; + int ret = 0; /* * Depending on what the caller is waiting for, either any @@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu) } - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); + while (true) { + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - work->waiters_pending = true; + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + work->waiters_pending = true; + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) + break; + + if (cpu != RING_BUFFER_ALL_CPUS && + !ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + break; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + if (!pagebusy) + break; + } - if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) schedule(); + } finish_wait(&work->waiters, &wait); - return 0; + + return ret; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8a528392b1f4..15209335888d 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ -static int wait_on_pipe(struct trace_iterator *iter) +static int wait_on_pipe(struct trace_iterator *iter, bool full) { /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); + return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, + full); } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp) mutex_unlock(&iter->mutex); - ret = wait_on_pipe(iter); + ret = wait_on_pipe(iter, false); mutex_lock(&iter->mutex); if (ret) return ret; - - if (signal_pending(current)) - return -EINTR; } return 1; @@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, goto out_unlock; } mutex_unlock(&trace_types_lock); - ret = wait_on_pipe(iter); + ret = wait_on_pipe(iter, false); mutex_lock(&trace_types_lock); if (ret) { size = ret; goto out_unlock; } - if (signal_pending(current)) { - size = -EINTR; - goto out_unlock; - } goto again; } size = 0; @@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, goto out; } mutex_unlock(&trace_types_lock); - ret = wait_on_pipe(iter); + ret = wait_on_pipe(iter, true); mutex_lock(&trace_types_lock); if (ret) goto out; - if (signal_pending(current)) { - ret = -EINTR; - goto out; - } + goto again; } -- cgit v1.2.3 From ad53f92eb416d81e469fa8ea57153e59455e7175 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Thu, 13 Nov 2014 15:19:11 -0800 Subject: mm/page_alloc: fix incorrect isolation behavior by rechecking migratetype Before describing bugs itself, I first explain definition of freepage. 1. pages on buddy list are counted as freepage. 2. pages on isolate migratetype buddy list are *not* counted as freepage. 3. pages on cma buddy list are counted as CMA freepage, too. Now, I describe problems and related patch. Patch 1: There is race conditions on getting pageblock migratetype that it results in misplacement of freepages on buddy list, incorrect freepage count and un-availability of freepage. Patch 2: Freepages on pcp list could have stale cached information to determine migratetype of buddy list to go. This causes misplacement of freepages on buddy list and incorrect freepage count. Patch 4: Merging between freepages on different migratetype of pageblocks will cause freepages accouting problem. This patch fixes it. Without patchset [3], above problem doesn't happens on my CMA allocation test, because CMA reserved pages aren't used at all. So there is no chance for above race. With patchset [3], I did simple CMA allocation test and get below result: - Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation - run kernel build (make -j16) on background - 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval - Result: more than 5000 freepage count are missed With patchset [3] and this patchset, I found that no freepage count are missed so that I conclude that problems are solved. On my simple memory offlining test, these problems also occur on that environment, too. This patch (of 4): There are two paths to reach core free function of buddy allocator, __free_one_page(), one is free_one_page()->__free_one_page() and the other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page(). Each paths has race condition causing serious problems. At first, this patch is focused on first type of freepath. And then, following patch will solve the problem in second type of freepath. In the first type of freepath, we got migratetype of freeing page without holding the zone lock, so it could be racy. There are two cases of this race. 1. pages are added to isolate buddy list after restoring orignal migratetype CPU1 CPU2 get migratetype => return MIGRATE_ISOLATE call free_one_page() with MIGRATE_ISOLATE grab the zone lock unisolate pageblock release the zone lock grab the zone lock call __free_one_page() with MIGRATE_ISOLATE freepage go into isolate buddy list, although pageblock is already unisolated This may cause two problems. One is that we can't use this page anymore until next isolation attempt of this pageblock, because freepage is on isolate buddy list. The other is that freepage accouting could be wrong due to merging between different buddy list. Freepages on isolate buddy list aren't counted as freepage, but ones on normal buddy list are counted as freepage. If merge happens, buddy freepage on normal buddy list is inevitably moved to isolate buddy list without any consideration of freepage accouting so it could be incorrect. 2. pages are added to normal buddy list while pageblock is isolated. It is similar with above case. This also may cause two problems. One is that we can't keep these freepages from being allocated. Although this pageblock is isolated, freepage would be added to normal buddy list so that it could be allocated without any restriction. And the other problem is same as case 1, that it, incorrect freepage accouting. This race condition would be prevented by checking migratetype again with holding the zone lock. Because it is somewhat heavy operation and it isn't needed in common case, we want to avoid rechecking as much as possible. So this patch introduce new variable, nr_isolate_pageblock in struct zone to check if there is isolated pageblock. With this, we can avoid to re-check migratetype in common case and do it only if there is isolated pageblock or migratetype is MIGRATE_ISOLATE. This solve above mentioned problems. Changes from v3: Add one more check in free_one_page() that checks whether migratetype is MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens. Signed-off-by: Joonsoo Kim Acked-by: Minchan Kim Acked-by: Michal Nazarewicz Acked-by: Vlastimil Babka Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Johannes Weiner Cc: Yasuaki Ishimatsu Cc: Zhang Yanfei Cc: Tang Chen Cc: Naoya Horiguchi Cc: Bartlomiej Zolnierkiewicz Cc: Wen Congyang Cc: Marek Szyprowski Cc: Laura Abbott Cc: Heesub Shin Cc: "Aneesh Kumar K.V" Cc: Ritesh Harjani Cc: Gioh Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 9 +++++++++ include/linux/page-isolation.h | 8 ++++++++ mm/page_alloc.c | 11 +++++++++-- mm/page_isolation.c | 2 ++ 4 files changed, 28 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 48bf12ef6620..ffe66e381c04 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -431,6 +431,15 @@ struct zone { */ int nr_migrate_reserve_block; +#ifdef CONFIG_MEMORY_ISOLATION + /* + * Number of isolated pageblock. It is used to solve incorrect + * freepage counting problem due to racy retrieving migratetype + * of pageblock. Protected by zone->lock. + */ + unsigned long nr_isolate_pageblock; +#endif + #ifdef CONFIG_MEMORY_HOTPLUG /* see spanned/present_pages for more description */ seqlock_t span_seqlock; diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3fff8e774067..2dc1e1697b45 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -2,6 +2,10 @@ #define __LINUX_PAGEISOLATION_H #ifdef CONFIG_MEMORY_ISOLATION +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return zone->nr_isolate_pageblock; +} static inline bool is_migrate_isolate_page(struct page *page) { return get_pageblock_migratetype(page) == MIGRATE_ISOLATE; @@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype) return migratetype == MIGRATE_ISOLATE; } #else +static inline bool has_isolate_pageblock(struct zone *zone) +{ + return false; +} static inline bool is_migrate_isolate_page(struct page *page) { return false; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9cd36b822444..df1da25b309b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -739,9 +739,16 @@ static void free_one_page(struct zone *zone, if (nr_scanned) __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); + if (unlikely(has_isolate_pageblock(zone) || + is_migrate_isolate(migratetype))) { + migratetype = get_pfnblock_migratetype(page, pfn); + if (is_migrate_isolate(migratetype)) + goto skip_counting; + } + __mod_zone_freepage_state(zone, 1 << order, migratetype); + +skip_counting: __free_one_page(page, pfn, zone, order, migratetype); - if (unlikely(!is_migrate_isolate(migratetype))) - __mod_zone_freepage_state(zone, 1 << order, migratetype); spin_unlock(&zone->lock); } diff --git a/mm/page_isolation.c b/mm/page_isolation.c index d1473b2e9481..1fa4a4d72ecc 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -60,6 +60,7 @@ out: int migratetype = get_pageblock_migratetype(page); set_pageblock_migratetype(page, MIGRATE_ISOLATE); + zone->nr_isolate_pageblock++; nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE); __mod_zone_freepage_state(zone, -nr_pages, migratetype); @@ -83,6 +84,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype) nr_pages = move_freepages_block(zone, page, migratetype); __mod_zone_freepage_state(zone, nr_pages, migratetype); set_pageblock_migratetype(page, migratetype); + zone->nr_isolate_pageblock--; out: spin_unlock_irqrestore(&zone->lock, flags); } -- cgit v1.2.3 From f784a3f19613901ca4539a5b0eed3bdc700e6ee7 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Thu, 13 Nov 2014 15:19:39 -0800 Subject: mem-hotplug: reset node managed pages when hot-adding a new pgdat In free_area_init_core(), zone->managed_pages is set to an approximate value for lowmem, and will be adjusted when the bootmem allocator frees pages into the buddy system. But free_area_init_core() is also called by hotadd_new_pgdat() when hot-adding memory. As a result, zone->managed_pages of the newly added node's pgdat is set to an approximate value in the very beginning. Even if the memory on that node has node been onlined, /sys/device/system/node/nodeXXX/meminfo has wrong value: hot-add node2 (memory not onlined) cat /sys/device/system/node/node2/meminfo Node 2 MemTotal: 33554432 kB Node 2 MemFree: 0 kB Node 2 MemUsed: 33554432 kB Node 2 Active: 0 kB This patch fixes this problem by reset node managed pages to 0 after hot-adding a new node. 1. Move reset_managed_pages_done from reset_node_managed_pages() to reset_all_zones_managed_pages() 2. Make reset_node_managed_pages() non-static 3. Call reset_node_managed_pages() in hotadd_new_pgdat() after pgdat is initialized Signed-off-by: Tang Chen Signed-off-by: Yasuaki Ishimatsu Cc: [3.16+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 1 + mm/bootmem.c | 9 +++++---- mm/memory_hotplug.c | 9 +++++++++ mm/nobootmem.c | 8 +++++--- 4 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 4e2bd4c95b66..0995c2de8162 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); extern unsigned long free_all_bootmem(void); +extern void reset_node_managed_pages(pg_data_t *pgdat); extern void reset_all_zones_managed_pages(void); extern void free_bootmem_node(pg_data_t *pgdat, diff --git a/mm/bootmem.c b/mm/bootmem.c index 8a000cebb0d7..477be696511d 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) static int reset_managed_pages_done __initdata; -static inline void __init reset_node_managed_pages(pg_data_t *pgdat) +void reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - if (reset_managed_pages_done) - return; - for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) z->managed_pages = 0; } @@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void) { struct pglist_data *pgdat; + if (reset_managed_pages_done) + return; + for_each_online_pgdat(pgdat) reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 252e1dbbed86..c5f7fe199a60 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1096,6 +1097,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) build_all_zonelists(pgdat, NULL); mutex_unlock(&zonelists_mutex); + /* + * zone->managed_pages is set to an approximate value in + * free_area_init_core(), which will cause + * /sys/device/system/node/nodeX/meminfo has wrong data. + * So reset it to 0 before any memory is onlined. + */ + reset_node_managed_pages(pgdat); + return pgdat; } diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 7c7ab32ee503..90b50468333e 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void) static int reset_managed_pages_done __initdata; -static inline void __init reset_node_managed_pages(pg_data_t *pgdat) +void reset_node_managed_pages(pg_data_t *pgdat) { struct zone *z; - if (reset_managed_pages_done) - return; for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++) z->managed_pages = 0; } @@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void) { struct pglist_data *pgdat; + if (reset_managed_pages_done) + return; + for_each_online_pgdat(pgdat) reset_node_managed_pages(pgdat); + reset_managed_pages_done = 1; } -- cgit v1.2.3