From d3091298570006fa538ec9beacbfb1098964962e Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 16 May 2014 23:26:05 +0200 Subject: sparc: fix sparse warnings in smp_32.c + smp_64.c Fix following warnings: smp_32.c:177:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static? smp_64.c:1202:5: warning: symbol 'setup_profiling_timer' was not declared. Should it be static? smp_64.c:989:6: warning: symbol 'kgdb_roundup_cpus' was not declared. Should it be static? Add prototype to include/linux/profile.h of setup_profiling_timer Add missing include to smp_64.c Signed-off-by: Sam Ravnborg Signed-off-by: David S. Miller --- include/linux/profile.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/profile.h b/include/linux/profile.h index aaad3861beb8..b537a25ffa17 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -44,6 +44,7 @@ extern int prof_on __read_mostly; int profile_init(void); int profile_setup(char *str); void profile_tick(int type); +int setup_profiling_timer(unsigned int multiplier); /* * Add multiple profiler hits to a given address: -- cgit v1.2.3 From b14903e10a06347234b387f7364f86aa07252d9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 4 Jun 2014 16:46:00 +0200 Subject: regulator: add regulator_can_change_voltage stub When CONFIG_REGULATOR is not set, we cannot call regulator_can_change_voltage() from a device driver, which results in a build error like video/fbdev/omap2/dss/hdmi5.c: In function 'hdmi_init_regulator': video/fbdev/omap2/dss/hdmi5.c:149:2: error: implicit declaration of function 'regulator_can_change_voltage' [-Werror=implicit-function-declaration] even for drivers that don't require the regulator API normally. Such a use was recently added in the omap2+ hdmi driver. This avoids the problem by adding a static inline function stub in the API header, as we have for most of the other regulator functions as well. Signed-off-by: Arnd Bergmann Cc: Mark Brown Cc: Tomi Valkeinen Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index e530681bea70..d60b92a7fc25 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -391,6 +391,11 @@ static inline void regulator_bulk_free(int num_consumers, { } +static inline int regulator_can_change_voltage(struct regulator *regulator) +{ + return 0; +} + static inline int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV) { -- cgit v1.2.3 From 962bd40bc30e412828e091bfda041b7547e779c8 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 10 Jun 2014 12:24:40 -0400 Subject: locks: add missing memory barrier in break_deleg break_deleg is subject to the same potential race as break_lease. Add a memory barrier to prevent it. Signed-off-by: Jeff Layton --- include/linux/fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c3f46e499dd0..22ae79650b82 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1914,6 +1914,12 @@ static inline int break_lease(struct inode *inode, unsigned int mode) static inline int break_deleg(struct inode *inode, unsigned int mode) { + /* + * Since this check is lockless, we must ensure that any refcounts + * taken are done before checking inode->i_flock. Otherwise, we could + * end up racing with tasks trying to set a new lease on this file. + */ + smp_mb(); if (inode->i_flock) return __break_lease(inode, mode, FL_DELEG); return 0; -- cgit v1.2.3 From 2940474af79744411da0cb63b041ad52c57bc443 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Jun 2014 13:49:23 +0200 Subject: block: remove elv_abort_queue and blk_abort_flushes elv_abort_queue has no callers, and blk_abort_flushes is only called by elv_abort_queue. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 38 -------------------------------------- block/blk.h | 1 - block/elevator.c | 20 -------------------- include/linux/elevator.h | 1 - 4 files changed, 60 deletions(-) (limited to 'include/linux') diff --git a/block/blk-flush.c b/block/blk-flush.c index 8ffee4b5f93d..3cb5e9e7108a 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -421,44 +421,6 @@ void blk_insert_flush(struct request *rq) blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); } -/** - * blk_abort_flushes - @q is being aborted, abort flush requests - * @q: request_queue being aborted - * - * To be called from elv_abort_queue(). @q is being aborted. Prepare all - * FLUSH/FUA requests for abortion. - * - * CONTEXT: - * spin_lock_irq(q->queue_lock) - */ -void blk_abort_flushes(struct request_queue *q) -{ - struct request *rq, *n; - int i; - - /* - * Requests in flight for data are already owned by the dispatch - * queue or the device driver. Just restore for normal completion. - */ - list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) { - list_del_init(&rq->flush.list); - blk_flush_restore_request(rq); - } - - /* - * We need to give away requests on flush queues. Restore for - * normal completion and put them on the dispatch queue. - */ - for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) { - list_for_each_entry_safe(rq, n, &q->flush_queue[i], - flush.list) { - list_del_init(&rq->flush.list); - blk_flush_restore_request(rq); - list_add_tail(&rq->queuelist, &q->queue_head); - } - } -} - /** * blkdev_issue_flush - queue a flush * @bdev: blockdev to issue flush for diff --git a/block/blk.h b/block/blk.h index 45385e9abf6f..6748c4f8d7a1 100644 --- a/block/blk.h +++ b/block/blk.h @@ -84,7 +84,6 @@ static inline void blk_clear_rq_complete(struct request *rq) #define ELV_ON_HASH(rq) ((rq)->cmd_flags & REQ_HASHED) void blk_insert_flush(struct request *rq); -void blk_abort_flushes(struct request_queue *q); static inline struct request *__elv_next_request(struct request_queue *q) { diff --git a/block/elevator.c b/block/elevator.c index f35edddfe9b5..34bded18910e 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -729,26 +729,6 @@ int elv_may_queue(struct request_queue *q, int rw) return ELV_MQUEUE_MAY; } -void elv_abort_queue(struct request_queue *q) -{ - struct request *rq; - - blk_abort_flushes(q); - - while (!list_empty(&q->queue_head)) { - rq = list_entry_rq(q->queue_head.next); - rq->cmd_flags |= REQ_QUIET; - trace_block_rq_abort(q, rq); - /* - * Mark this request as started so we don't trigger - * any debug logic in the end I/O path. - */ - blk_start_request(rq); - __blk_end_request_all(rq, -EIO); - } -} -EXPORT_SYMBOL(elv_abort_queue); - void elv_completed_request(struct request_queue *q, struct request *rq) { struct elevator_queue *e = q->elevator; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 4ff262e2bf37..e2a6bd7fb133 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -133,7 +133,6 @@ extern struct request *elv_latter_request(struct request_queue *, struct request extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); -extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *q, struct request *rq, struct bio *bio, gfp_t gfp_mask); -- cgit v1.2.3 From a6e15a39048ec3229b9a53425f4384f55f6cc1b3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 13 Jun 2014 13:30:35 -0700 Subject: PM / hibernate: introduce "nohibernate" boot parameter To support using kernel features that are not compatible with hibernation, this creates the "nohibernate" kernel boot parameter to disable both hibernation and resume. This allows hibernation support to be a boot-time choice instead of only a compile-time choice. Signed-off-by: Kees Cook Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 3 +++ include/linux/suspend.h | 2 ++ kernel/power/hibernate.c | 31 ++++++++++++++++++++++++++++++- kernel/power/main.c | 6 ++---- kernel/power/user.c | 3 +++ 5 files changed, 40 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 6eaa9cdb7094..f8f0466b8b1d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2184,6 +2184,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. in certain environments such as networked servers or real-time systems. + nohibernate [HIBERNATION] Disable hibernation and resume. + nohz= [KNL] Boottime enable/disable dynamic ticks Valid arguments: on, off Default: on @@ -2980,6 +2982,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. noresume Don't check if there's a hibernation image present during boot. nocompress Don't compress/decompress hibernation images. + no Disable hibernation and resume. retain_initrd [RAM] Keep initrd memory after extraction diff --git a/include/linux/suspend.h b/include/linux/suspend.h index f76994b9396c..519064e0c943 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -327,6 +327,7 @@ extern unsigned long get_safe_page(gfp_t gfp_mask); extern void hibernation_set_ops(const struct platform_hibernation_ops *ops); extern int hibernate(void); extern bool system_entering_hibernation(void); +extern bool hibernation_available(void); asmlinkage int swsusp_save(void); extern struct pbe *restore_pblist; #else /* CONFIG_HIBERNATION */ @@ -339,6 +340,7 @@ static inline void swsusp_unset_page_free(struct page *p) {} static inline void hibernation_set_ops(const struct platform_hibernation_ops *ops) {} static inline int hibernate(void) { return -ENOSYS; } static inline bool system_entering_hibernation(void) { return false; } +static inline bool hibernation_available(void) { return false; } #endif /* CONFIG_HIBERNATION */ /* Hibernation and suspend events */ diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 49e0a20fd010..258f492f0347 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -35,6 +35,7 @@ static int nocompress; static int noresume; +static int nohibernate; static int resume_wait; static unsigned int resume_delay; static char resume_file[256] = CONFIG_PM_STD_PARTITION; @@ -62,6 +63,11 @@ bool freezer_test_done; static const struct platform_hibernation_ops *hibernation_ops; +bool hibernation_available(void) +{ + return (nohibernate == 0); +} + /** * hibernation_set_ops - Set the global hibernate operations. * @ops: Hibernation operations to use in subsequent hibernation transitions. @@ -642,6 +648,11 @@ int hibernate(void) { int error; + if (!hibernation_available()) { + pr_debug("PM: Hibernation not available.\n"); + return -EPERM; + } + lock_system_sleep(); /* The snapshot device should not be opened while we're running */ if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { @@ -734,7 +745,7 @@ static int software_resume(void) /* * If the user said "noresume".. bail out early. */ - if (noresume) + if (noresume || !hibernation_available()) return 0; /* @@ -900,6 +911,9 @@ static ssize_t disk_show(struct kobject *kobj, struct kobj_attribute *attr, int i; char *start = buf; + if (!hibernation_available()) + return sprintf(buf, "[disabled]\n"); + for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) { if (!hibernation_modes[i]) continue; @@ -934,6 +948,9 @@ static ssize_t disk_store(struct kobject *kobj, struct kobj_attribute *attr, char *p; int mode = HIBERNATION_INVALID; + if (!hibernation_available()) + return -EPERM; + p = memchr(buf, '\n', n); len = p ? p - buf : n; @@ -1101,6 +1118,10 @@ static int __init hibernate_setup(char *str) noresume = 1; else if (!strncmp(str, "nocompress", 10)) nocompress = 1; + else if (!strncmp(str, "no", 2)) { + noresume = 1; + nohibernate = 1; + } return 1; } @@ -1125,9 +1146,17 @@ static int __init resumedelay_setup(char *str) return 1; } +static int __init nohibernate_setup(char *str) +{ + noresume = 1; + nohibernate = 1; + return 1; +} + __setup("noresume", noresume_setup); __setup("resume_offset=", resume_offset_setup); __setup("resume=", resume_setup); __setup("hibernate=", hibernate_setup); __setup("resumewait", resumewait_setup); __setup("resumedelay=", resumedelay_setup); +__setup("nohibernate", nohibernate_setup); diff --git a/kernel/power/main.c b/kernel/power/main.c index 573410d6647e..8e90f330f139 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -300,13 +300,11 @@ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, s += sprintf(s,"%s ", pm_states[i].label); #endif -#ifdef CONFIG_HIBERNATION - s += sprintf(s, "%s\n", "disk"); -#else + if (hibernation_available()) + s += sprintf(s, "disk "); if (s != buf) /* convert the last space to a newline */ *(s-1) = '\n'; -#endif return (s - buf); } diff --git a/kernel/power/user.c b/kernel/power/user.c index 98d357584cd6..526e8911460a 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -49,6 +49,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) struct snapshot_data *data; int error; + if (!hibernation_available()) + return -EPERM; + lock_system_sleep(); if (!atomic_add_unless(&snapshot_device_available, -1, 0)) { -- cgit v1.2.3 From 736ed4de766d4f0e8e6142dd4f9d73ef61835ed9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 Jun 2014 22:09:29 -0700 Subject: block: blk_max_size_offset() should check ->max_sectors Commit 762380ad9322 inadvertently changed a check for max_sectors to max_hw_sectors. Revert that part, so we still compare against max_sectors. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 31e11051f1ba..713f8b62b435 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -920,7 +920,7 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, sector_t offset) { if (!q->limits.chunk_sectors) - return q->limits.max_hw_sectors; + return q->limits.max_sectors; return q->limits.chunk_sectors - (offset & (q->limits.chunk_sectors - 1)); -- cgit v1.2.3 From 8537b12034cf1fd3fab3da2c859d71f76846fae9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Tue, 17 Jun 2014 22:12:35 -0700 Subject: blk-mq: bitmap tag: fix races on shared ::wake_index fields Fix racy updates of shared blk_mq_bitmap_tags::wake_index and blk_mq_hw_ctx::wake_index fields. Cc: Ming Lei Signed-off-by: Alexander Gordeev Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 32 +++++++++++++++++++++----------- block/blk-mq-tag.h | 2 +- include/linux/blk-mq.h | 2 +- 3 files changed, 23 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 1aab39f71d95..6deb13055490 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -43,9 +43,16 @@ bool blk_mq_has_free_tags(struct blk_mq_tags *tags) return bt_has_free_tags(&tags->bitmap_tags); } -static inline void bt_index_inc(unsigned int *index) +static inline int bt_index_inc(int index) { - *index = (*index + 1) & (BT_WAIT_QUEUES - 1); + return (index + 1) & (BT_WAIT_QUEUES - 1); +} + +static inline void bt_index_atomic_inc(atomic_t *index) +{ + int old = atomic_read(index); + int new = bt_index_inc(old); + atomic_cmpxchg(index, old, new); } /* @@ -69,14 +76,14 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) int i, wake_index; bt = &tags->bitmap_tags; - wake_index = bt->wake_index; + wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { struct bt_wait_state *bs = &bt->bs[wake_index]; if (waitqueue_active(&bs->wait)) wake_up(&bs->wait); - bt_index_inc(&wake_index); + wake_index = bt_index_inc(wake_index); } } @@ -212,12 +219,14 @@ static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx) { struct bt_wait_state *bs; + int wait_index; if (!hctx) return &bt->bs[0]; - bs = &bt->bs[hctx->wait_index]; - bt_index_inc(&hctx->wait_index); + wait_index = atomic_read(&hctx->wait_index); + bs = &bt->bs[wait_index]; + bt_index_atomic_inc(&hctx->wait_index); return bs; } @@ -313,18 +322,19 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) { int i, wake_index; - wake_index = bt->wake_index; + wake_index = atomic_read(&bt->wake_index); for (i = 0; i < BT_WAIT_QUEUES; i++) { struct bt_wait_state *bs = &bt->bs[wake_index]; if (waitqueue_active(&bs->wait)) { - if (wake_index != bt->wake_index) - bt->wake_index = wake_index; + int o = atomic_read(&bt->wake_index); + if (wake_index != o) + atomic_cmpxchg(&bt->wake_index, o, wake_index); return bs; } - bt_index_inc(&wake_index); + wake_index = bt_index_inc(wake_index); } return NULL; @@ -344,7 +354,7 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) bs = bt_wake_ptr(bt); if (bs && atomic_dec_and_test(&bs->wait_cnt)) { atomic_set(&bs->wait_cnt, bt->wake_cnt); - bt_index_inc(&bt->wake_index); + bt_index_atomic_inc(&bt->wake_index); wake_up(&bs->wait); } } diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 98696a65d4d4..6206ed17ef76 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -24,7 +24,7 @@ struct blk_mq_bitmap_tags { unsigned int map_nr; struct blk_align_bitmap *map; - unsigned int wake_index; + atomic_t wake_index; struct bt_wait_state *bs; }; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a002cf191427..eb726b9c5762 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -42,7 +42,7 @@ struct blk_mq_hw_ctx { unsigned int nr_ctx; struct blk_mq_ctx **ctxs; - unsigned int wait_index; + atomic_t wait_index; struct blk_mq_tags *tags; -- cgit v1.2.3 From 2b8f2a28eac1d35a432705d269f02bdaeba9be8f Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Wed, 18 Jun 2014 11:01:41 +0200 Subject: net: phylib: add link_change_notify callback to phy device Add a notify callback to inform phy drivers when the core is about to do its link adjustment. No change for drivers that do not implement this callback. Signed-off-by: Daniel Mack Signed-off-by: David S. Miller --- drivers/net/phy/phy.c | 3 +++ include/linux/phy.h | 9 +++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 3bc079a67a3d..f7c61812ea4a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -720,6 +720,9 @@ void phy_state_machine(struct work_struct *work) mutex_lock(&phydev->lock); + if (phydev->drv->link_change_notify) + phydev->drv->link_change_notify(phydev); + switch (phydev->state) { case PHY_DOWN: case PHY_STARTING: diff --git a/include/linux/phy.h b/include/linux/phy.h index 864ddafad8cc..68041446c450 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -536,6 +536,15 @@ struct phy_driver { /* See set_wol, but for checking whether Wake on LAN is enabled. */ void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol); + /* + * Called to inform a PHY device driver when the core is about to + * change the link state. This callback is supposed to be used as + * fixup hook for drivers that need to take action when the link + * state changes. Drivers are by no means allowed to mess with the + * PHY device structure in their implementations. + */ + void (*link_change_notify)(struct phy_device *dev); + struct device_driver driver; }; #define to_phy_driver(d) container_of(d, struct phy_driver, driver) -- cgit v1.2.3 From b3acc56bfe1287c6b666e80edc70b89eea2a1a80 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 23 Jun 2014 13:22:03 -0700 Subject: kexec: save PG_head_mask in VMCOREINFO To allow filtering of huge pages, makedumpfile must be able to identify them in the dump. This can be done by checking the appropriate page flag, so communicate its value to makedumpfile through the VMCOREINFO interface. There's only one small catch. Depending on how many page flags are available on a given architecture, this bit can be called PG_head or PG_compound. I sent a similar patch back in 2012, but Eric Biederman did not like using an #ifdef. So, this time I'm adding a common symbol (PG_head_mask) instead. See https://lkml.org/lkml/2012/11/28/91 for the previous version. Signed-off-by: Petr Tesarik Acked-by: Vivek Goyal Cc: Eric Biederman Cc: Paul Mackerras Cc: Fengguang Wu Cc: Benjamin Herrenschmidt Cc: Shaohua Li Cc: Alexey Kardashevskiy Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 3 +++ kernel/kexec.c | 1 + 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3c545b48aeab..8304959ad336 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -360,6 +360,9 @@ static inline void ClearPageCompound(struct page *page) ClearPageHead(page); } #endif + +#define PG_head_mask ((1L << PG_head)) + #else /* * Reduce page flag use as much as possible by overlapping diff --git a/kernel/kexec.c b/kernel/kexec.c index 6748688813d0..369f41a94124 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void) #ifdef CONFIG_MEMORY_FAILURE VMCOREINFO_NUMBER(PG_hwpoison); #endif + VMCOREINFO_NUMBER(PG_head_mask); VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); arch_crash_save_vmcoreinfo(); -- cgit v1.2.3 From f3aca3d09525f87731ba6b892c9b010570bc54b4 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 23 Jun 2014 13:22:05 -0700 Subject: nmi: provide the option to issue an NMI back trace to every cpu but current Sometimes it is preferred not to use the trigger_all_cpu_backtrace() routine when one wants to avoid capturing a back trace for current. For instance if one was previously captured recently. This patch provides a new routine namely trigger_allbutself_cpu_backtrace() which offers the flexibility to issue an NMI to every cpu but current and capture a back trace accordingly. Patch x86 and sparc to support new routine. [dzickus@redhat.com: add stub in #else clause] [dzickus@redhat.com: don't print message in single processor case, wrap with get/put_cpu based on Oleg's suggestion] [sfr@canb.auug.org.au: undo C99ism] Signed-off-by: Aaron Tomlin Signed-off-by: Don Zickus Acked-by: David S. Miller Cc: Mateusz Guzik Cc: Oleg Nesterov Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/sparc/include/asm/irq_64.h | 2 +- arch/sparc/kernel/process_64.c | 18 ++++++++++++------ arch/x86/include/asm/irq.h | 2 +- arch/x86/kernel/apic/hw_nmi.c | 18 ++++++++++++++---- include/linux/nmi.h | 11 ++++++++++- 5 files changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 375cffcf7dbd..91d219381306 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -89,7 +89,7 @@ static inline unsigned long get_softint(void) return retval; } -void arch_trigger_all_cpu_backtrace(void); +void arch_trigger_all_cpu_backtrace(bool); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace extern void *hardirq_stack[NR_CPUS]; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index b2988f25e230..027e09986194 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp) } } -void arch_trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(bool include_self) { struct thread_info *tp = current_thread_info(); struct pt_regs *regs = get_irq_regs(); @@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void) spin_lock_irqsave(&global_cpu_snapshot_lock, flags); - memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); - this_cpu = raw_smp_processor_id(); - __global_reg_self(tp, regs, this_cpu); + memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); + + if (include_self) + __global_reg_self(tp, regs, this_cpu); smp_fetch_global_regs(); for_each_online_cpu(cpu) { - struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg; + struct global_reg_snapshot *gp; + + if (!include_self && cpu == this_cpu) + continue; + + gp = &global_cpu_snapshot[cpu].reg; __global_reg_poll(gp); @@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void) static void sysrq_handle_globreg(int key) { - arch_trigger_all_cpu_backtrace(); + arch_trigger_all_cpu_backtrace(true); } static struct sysrq_key_op sparc_globalreg_op = { diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index cb6cfcd034cf..a80cbb88ea91 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); #ifdef CONFIG_X86_LOCAL_APIC -void arch_trigger_all_cpu_backtrace(void); +void arch_trigger_all_cpu_backtrace(bool); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c3fcb5de5083..6a1e71bde323 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; -void arch_trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(bool include_self) { int i; + int cpu = get_cpu(); - if (test_and_set_bit(0, &backtrace_flag)) + if (test_and_set_bit(0, &backtrace_flag)) { /* * If there is already a trigger_all_cpu_backtrace() in progress * (backtrace_flag == 1), don't output double cpu dump infos. */ + put_cpu(); return; + } cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + if (!include_self) + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); + if (!cpumask_empty(to_cpumask(backtrace_mask))) { + pr_info("sending NMI to %s CPUs:\n", + (include_self ? "all" : "other")); + apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR); + } /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); + touch_softlockup_watchdog(); } clear_bit(0, &backtrace_flag); smp_mb__after_atomic(); + put_cpu(); } static int diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 6a45fb583ff1..a17ab6398d7c 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void) #ifdef arch_trigger_all_cpu_backtrace static inline bool trigger_all_cpu_backtrace(void) { - arch_trigger_all_cpu_backtrace(); + arch_trigger_all_cpu_backtrace(true); return true; } +static inline bool trigger_allbutself_cpu_backtrace(void) +{ + arch_trigger_all_cpu_backtrace(false); + return true; +} #else static inline bool trigger_all_cpu_backtrace(void) { return false; } +static inline bool trigger_allbutself_cpu_backtrace(void) +{ + return false; +} #endif #ifdef CONFIG_LOCKUP_DETECTOR -- cgit v1.2.3 From ed235875e2ca983197831337a986f0517074e1a0 Mon Sep 17 00:00:00 2001 From: Aaron Tomlin Date: Mon, 23 Jun 2014 13:22:05 -0700 Subject: kernel/watchdog.c: print traces for all cpus on lockup detection A 'softlockup' is defined as a bug that causes the kernel to loop in kernel mode for more than a predefined period to time, without giving other tasks a chance to run. Currently, upon detection of this condition by the per-cpu watchdog task, debug information (including a stack trace) is sent to the system log. On some occasions, we have observed that the "victim" rather than the actual "culprit" (i.e. the owner/holder of the contended resource) is reported to the user. Often this information has proven to be insufficient to assist debugging efforts. To avoid loss of useful debug information, for architectures which support NMI, this patch makes it possible to improve soft lockup reporting. This is accomplished by issuing an NMI to each cpu to obtain a stack trace. If NMI is not supported we just revert back to the old method. A sysctl and boot-time parameter is available to toggle this feature. [dzickus@redhat.com: add CONFIG_SMP in certain areas] [akpm@linux-foundation.org: additional CONFIG_SMP=n optimisations] [mq@suse.cz: fix warning] Signed-off-by: Aaron Tomlin Signed-off-by: Don Zickus Cc: David S. Miller Cc: Mateusz Guzik Cc: Oleg Nesterov Signed-off-by: Jan Moskyto Matejka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 5 +++++ Documentation/sysctl/kernel.txt | 17 ++++++++++++++++ include/linux/nmi.h | 1 + kernel/sysctl.c | 11 +++++++++++ kernel/watchdog.c | 39 +++++++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+) (limited to 'include/linux') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 884904975d0b..c1b9aa8c5a52 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [KNL] Should the soft-lockup detector generate panics. Format: + softlockup_all_cpu_backtrace= + [KNL] Should the soft-lockup detector generate + backtraces on all cpus. + Format: + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 708bb7f1b7e0..c14374e71775 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -75,6 +75,7 @@ show up in /proc/sys/kernel: - shmall - shmmax [ sysv ipc ] - shmmni +- softlockup_all_cpu_backtrace - stop-a [ SPARC only ] - sysrq ==> Documentation/sysrq.txt - sysctl_writes_strict @@ -783,6 +784,22 @@ via the /proc/sys interface: ============================================================== +softlockup_all_cpu_backtrace: + +This value controls the soft lockup detector thread's behavior +when a soft lockup condition is detected as to whether or not +to gather further debug information. If enabled, each cpu will +be issued an NMI and instructed to capture stack trace. + +This feature is only applicable for architectures which support +NMI. + +0: do nothing. This is the default behavior. + +1: on detection capture more debug information. + +============================================================== + tainted: Non-zero if the kernel has been tainted. Numeric values, which diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a17ab6398d7c..447775ee2c4b 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -57,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(int watchdog_thresh); extern int watchdog_user_enabled; extern int watchdog_thresh; +extern int sysctl_softlockup_all_cpu_backtrace; struct ctl_table; extern int proc_dowatchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 075d1903138f..75b22e22a72c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -860,6 +860,17 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one, }, +#ifdef CONFIG_SMP + { + .procname = "softlockup_all_cpu_backtrace", + .data = &sysctl_softlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, +#endif /* CONFIG_SMP */ { .procname = "nmi_watchdog", .data = &watchdog_user_enabled, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 30e482240dae..c3319bd1b040 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,6 +31,12 @@ int watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; +#ifdef CONFIG_SMP +int __read_mostly sysctl_softlockup_all_cpu_backtrace; +#else +#define sysctl_softlockup_all_cpu_backtrace 0 +#endif + static int __read_mostly watchdog_running; static u64 __read_mostly sample_period; @@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif +static unsigned long soft_lockup_nmi_warn; /* boot commands */ /* @@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str) } __setup("nosoftlockup", nosoftlockup_setup); /* */ +#ifdef CONFIG_SMP +static int __init softlockup_all_cpu_backtrace_setup(char *str) +{ + sysctl_softlockup_all_cpu_backtrace = + !!simple_strtol(str, NULL, 0); + return 1; +} +__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); +#endif /* * Hard-lockup warnings should be triggered after just a few seconds. Soft- @@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); struct pt_regs *regs = get_irq_regs(); int duration; + int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; /* kick the hardlockup detector */ watchdog_interrupt_count(); @@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) if (__this_cpu_read(soft_watchdog_warn) == true) return HRTIMER_RESTART; + if (softlockup_all_cpu_backtrace) { + /* Prevent multiple soft-lockup reports if one cpu is already + * engaged in dumping cpu back traces + */ + if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { + /* Someone else will report us. Let's give up */ + __this_cpu_write(soft_watchdog_warn, true); + return HRTIMER_RESTART; + } + } + printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", smp_processor_id(), duration, current->comm, task_pid_nr(current)); @@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); + if (softlockup_all_cpu_backtrace) { + /* Avoid generating two back traces for current + * given that one is already made above + */ + trigger_allbutself_cpu_backtrace(); + + clear_bit(0, &soft_lockup_nmi_warn); + /* Barrier to sync with other cpus */ + smp_mb__after_atomic(); + } + if (softlockup_panic) panic("softlockup: hung tasks"); __this_cpu_write(soft_watchdog_warn, true); -- cgit v1.2.3