From 33c35aa4817864e056fd772230b0c6b552e36ea2 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 15 May 2017 09:34:06 -0400 Subject: cgroup: Prevent kill_css() from being called more than once The kill_css() function may be called more than once under the condition that the css was killed but not physically removed yet followed by the removal of the cgroup that is hosting the css. This patch prevents any harmm from being done when that happens. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org # v4.5+ --- include/linux/cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 21745946cae1..ec47101cb1bf 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -48,6 +48,7 @@ enum { CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ CSS_VISIBLE = (1 << 3), /* css is visible to userland */ + CSS_DYING = (1 << 4), /* css is dying */ }; /* bits in struct cgroup flags field */ -- cgit v1.2.3 From 41c25707d21716826e3c1f60967f5550610ec1c9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 May 2017 12:03:48 -0400 Subject: cpuset: consider dying css as offline In most cases, a cgroup controller don't care about the liftimes of cgroups. For the controller, a css becomes online when ->css_online() is called on it and offline when ->css_offline() is called. However, cpuset is special in that the user interface it exposes cares whether certain cgroups exist or not. Combined with the RCU delay between cgroup removal and css offlining, this can lead to user visible behavior oddities where operations which should succeed after cgroup removals fail for some time period. The effects of cgroup removals are delayed when seen from userland. This patch adds css_is_dying() which tests whether offline is pending and updates is_cpuset_online() so that the function returns false also while offline is pending. This gets rid of the userland visible delays. Signed-off-by: Tejun Heo Reported-by: Daniel Jordan Link: http://lkml.kernel.org/r/327ca1f5-7957-fbb9-9e5f-9ba149d40ba2@oracle.com Cc: stable@vger.kernel.org Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 20 ++++++++++++++++++++ kernel/cgroup/cpuset.c | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed2573e149fa..710a005c6b7a 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -343,6 +343,26 @@ static inline bool css_tryget_online(struct cgroup_subsys_state *css) return true; } +/** + * css_is_dying - test whether the specified css is dying + * @css: target css + * + * Test whether @css is in the process of offlining or already offline. In + * most cases, ->css_online() and ->css_offline() callbacks should be + * enough; however, the actual offline operations are RCU delayed and this + * test returns %true also when @css is scheduled to be offlined. + * + * This is useful, for example, when the use case requires synchronous + * behavior with respect to cgroup removal. cgroup removal schedules css + * offlining but the css can seem alive while the operation is being + * delayed. If the delay affects user visible semantics, this test can be + * used to resolve the situation. + */ +static inline bool css_is_dying(struct cgroup_subsys_state *css) +{ + return !(css->flags & CSS_NO_REF) && percpu_ref_is_dying(&css->refcnt); +} + /** * css_put - put a css reference * @css: target css diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index f6501f4f6040..ae643412948a 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -176,9 +176,9 @@ typedef enum { } cpuset_flagbits_t; /* convenient tests for these bits */ -static inline bool is_cpuset_online(const struct cpuset *cs) +static inline bool is_cpuset_online(struct cpuset *cs) { - return test_bit(CS_ONLINE, &cs->flags); + return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css); } static inline int is_cpu_exclusive(const struct cpuset *cs) -- cgit v1.2.3 From 4b1c88984c8ac894c2c411570757bed7fa5f3226 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 18 May 2017 15:13:57 +0200 Subject: iommu/dma: Fix function declaration Newly added code in the ipmmu-vmsa driver showed a small mistake in a header file that can't be included by itself without CONFIG_IOMMU_DMA enabled: In file included from drivers/iommu/ipmmu-vmsa.c:13:0: include/linux/dma-iommu.h:105:94: error: 'struct device' declared inside parameter list will not be visible outside of this definition or declaration [-Werror] This adds a forward declaration for 'struct device', similar to how we treat the other struct types in this case. Fixes: 3ae47292024f ("iommu/ipmmu-vmsa: Add new IOMMU_DOMAIN_DMA ops") Fixes: 273df9635385 ("iommu/dma: Make PCI window reservation generic") Signed-off-by: Arnd Bergmann Acked-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 4eac2670bfa1..92f20832fd28 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -78,6 +78,7 @@ void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); struct iommu_domain; struct msi_msg; +struct device; static inline int iommu_dma_init(void) { -- cgit v1.2.3 From 6dc06c08bef1c746ff8da33dab677cfbacdcad32 Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Sun, 4 Jun 2017 14:30:07 +0300 Subject: net/mlx4: Fix the check in attaching steering rules Our previous patch (cited below) introduced a regression for RAW Eth QPs. Fix it by checking if the QP number provided by user-space exists, hence allowing steering rules to be added for valid QPs only. Fixes: 89c557687a32 ("net/mlx4_en: Avoid adding steering rules with invalid ring") Reported-by: Or Gerlitz Signed-off-by: Talat Batheesh Signed-off-by: Tariq Toukan Acked-by: Or Gerlitz Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 5 ----- drivers/net/ethernet/mellanox/mlx4/mcg.c | 15 +++++++++++---- drivers/net/ethernet/mellanox/mlx4/qp.c | 13 +++++++++++++ include/linux/mlx4/qp.h | 1 + 4 files changed, 25 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index ae5fdc2df654..ffbcb27c05e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1562,11 +1562,6 @@ static int mlx4_en_flow_replace(struct net_device *dev, qpn = priv->drop_qp.qpn; else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) { qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1); - if (qpn < priv->rss_map.base_qpn || - qpn >= priv->rss_map.base_qpn + priv->rx_ring_num) { - en_warn(priv, "rxnfc: QP (0x%x) doesn't exist\n", qpn); - return -EINVAL; - } } else { if (cmd->fs.ring_cookie >= priv->rx_ring_num) { en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist\n", diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 1a670b681555..0710b3677464 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -35,6 +35,7 @@ #include #include +#include #include #include "mlx4.h" @@ -985,16 +986,21 @@ int mlx4_flow_attach(struct mlx4_dev *dev, if (IS_ERR(mailbox)) return PTR_ERR(mailbox); + if (!mlx4_qp_lookup(dev, rule->qpn)) { + mlx4_err_rule(dev, "QP doesn't exist\n", rule); + ret = -EINVAL; + goto out; + } + trans_rule_ctrl_to_hw(rule, mailbox->buf); size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); list_for_each_entry(cur, &rule->list, list) { ret = parse_trans_rule(dev, cur, mailbox->buf + size); - if (ret < 0) { - mlx4_free_cmd_mailbox(dev, mailbox); - return ret; - } + if (ret < 0) + goto out; + size += ret; } @@ -1021,6 +1027,7 @@ int mlx4_flow_attach(struct mlx4_dev *dev, } } +out: mlx4_free_cmd_mailbox(dev, mailbox); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 2d6abd4662b1..ad92d2311478 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -384,6 +384,19 @@ static void mlx4_qp_free_icm(struct mlx4_dev *dev, int qpn) __mlx4_qp_free_icm(dev, qpn); } +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn) +{ + struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; + struct mlx4_qp *qp; + + spin_lock(&qp_table->lock); + + qp = __mlx4_qp_lookup(dev, qpn); + + spin_unlock(&qp_table->lock); + return qp; +} + int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp, gfp_t gfp) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index b4ee8f62ce8d..8e2828d48d7f 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -470,6 +470,7 @@ struct mlx4_update_qp_params { u16 rate_val; }; +struct mlx4_qp *mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn); int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn, enum mlx4_update_qp_attr attr, struct mlx4_update_qp_params *params); -- cgit v1.2.3 From 9bd2bbc01d17ddd567cc0f81f77fe1163e497462 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 2 Jun 2017 20:35:51 -0700 Subject: elevator: fix truncation of icq_cache_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 7.1 reports the following warning: block/elevator.c: In function ‘elv_register’: block/elevator.c:898:5: warning: ‘snprintf’ output may be truncated before the last format character [-Wformat-truncation=] "%s_io_cq", e->elevator_name); ^~~~~~~~~~ block/elevator.c:897:3: note: ‘snprintf’ output between 7 and 22 bytes into a destination of size 21 snprintf(e->icq_cache_name, sizeof(e->icq_cache_name), ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ "%s_io_cq", e->elevator_name); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The bug is that the name of the icq_cache is 6 characters longer than the elevator name, but only ELV_NAME_MAX + 5 characters were reserved for it --- so in the case of a maximum-length elevator name, the 'q' character in "_io_cq" would be truncated by snprintf(). Fix it by reserving ELV_NAME_MAX + 6 characters instead. Signed-off-by: Eric Biggers Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 9ec5e22846e0..0e306c5a86d6 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -153,7 +153,7 @@ struct elevator_type #endif /* managed by elevator core */ - char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ + char icq_cache_name[ELV_NAME_MAX + 6]; /* elvname + "_io_cq" */ struct list_head list; }; -- cgit v1.2.3 From abb2ea7dfd82451d85ce669b811310c05ab5ca46 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Tue, 6 Jun 2017 13:36:24 -0700 Subject: compiler, clang: suppress warning for unused static inline functions GCC explicitly does not warn for unused static inline functions for -Wunused-function. The manual states: Warn whenever a static function is declared but not defined or a non-inline static function is unused. Clang does warn for static inline functions that are unused. It turns out that suppressing the warnings avoids potentially complex #ifdef directives, which also reduces LOC. Suppress the warning for clang. Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index de179993e039..ea9126006a69 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -15,3 +15,10 @@ * with any version that can compile the kernel */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +/* + * GCC does not warn about unused static inline functions for + * -Wunused-function. This turns out to avoid the need for complex #ifdef + * directives. Suppress the warning in clang as well. + */ +#define inline inline __attribute__((unused)) -- cgit v1.2.3 From f3b7eaae1b35eb8077610eb7c7db042c9b0645e1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jun 2017 00:57:37 +0200 Subject: Revert "ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle" Revert commit eed4d47efe95 (ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle) as it turned out to be premature and triggered a number of different issues on various systems. That includes, but is not limited to, premature suspend-to-RAM aborts on Dell XPS 13 (9343) reported by Dominik. The issue the commit in question attempted to address is real and will need to be taken care of going forward, but evidently more work is needed for this purpose. Reported-by: Dominik Brodowski Signed-off-by: Rafael J. Wysocki --- drivers/acpi/battery.c | 2 +- drivers/acpi/button.c | 5 ++--- drivers/acpi/device_pm.c | 3 +-- drivers/acpi/sleep.c | 28 ---------------------------- drivers/base/power/main.c | 5 +++++ drivers/base/power/wakeup.c | 18 ++++++------------ include/linux/suspend.h | 7 ++----- kernel/power/process.c | 2 +- kernel/power/suspend.c | 29 ++++------------------------- 9 files changed, 22 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 83ab17e4a795..4ef1e4624b2b 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -776,7 +776,7 @@ static int acpi_battery_update(struct acpi_battery *battery, bool resume) if ((battery->state & ACPI_BATTERY_STATE_CRITICAL) || (test_bit(ACPI_BATTERY_ALARM_PRESENT, &battery->flags) && (battery->capacity_now <= battery->alarm))) - pm_wakeup_hard_event(&battery->device->dev); + pm_wakeup_event(&battery->device->dev, 0); return result; } diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c index b7c2a06963d6..668137e4a069 100644 --- a/drivers/acpi/button.c +++ b/drivers/acpi/button.c @@ -216,7 +216,7 @@ static int acpi_lid_notify_state(struct acpi_device *device, int state) } if (state) - pm_wakeup_hard_event(&device->dev); + pm_wakeup_event(&device->dev, 0); ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device); if (ret == NOTIFY_DONE) @@ -398,7 +398,7 @@ static void acpi_button_notify(struct acpi_device *device, u32 event) } else { int keycode; - pm_wakeup_hard_event(&device->dev); + pm_wakeup_event(&device->dev, 0); if (button->suspended) break; @@ -530,7 +530,6 @@ static int acpi_button_add(struct acpi_device *device) lid_device = device; } - device_init_wakeup(&device->dev, true); printk(KERN_INFO PREFIX "%s [%s]\n", name, acpi_device_bid(device)); return 0; diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 798d5003a039..993fd31394c8 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "internal.h" @@ -400,7 +399,7 @@ static void acpi_pm_notify_handler(acpi_handle handle, u32 val, void *not_used) mutex_lock(&acpi_pm_notifier_lock); if (adev->wakeup.flags.notifier_present) { - pm_wakeup_ws_event(adev->wakeup.ws, 0, true); + __pm_wakeup_event(adev->wakeup.ws, 0); if (adev->wakeup.context.work.func) queue_pm_work(&adev->wakeup.context.work); } diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index e84005d642e6..a4327af676fe 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -662,40 +662,14 @@ static int acpi_freeze_prepare(void) acpi_os_wait_events_complete(); if (acpi_sci_irq_valid()) enable_irq_wake(acpi_sci_irq); - return 0; } -static void acpi_freeze_wake(void) -{ - /* - * If IRQD_WAKEUP_ARMED is not set for the SCI at this point, it means - * that the SCI has triggered while suspended, so cancel the wakeup in - * case it has not been a wakeup event (the GPEs will be checked later). - */ - if (acpi_sci_irq_valid() && - !irqd_is_wakeup_armed(irq_get_irq_data(acpi_sci_irq))) - pm_system_cancel_wakeup(); -} - -static void acpi_freeze_sync(void) -{ - /* - * Process all pending events in case there are any wakeup ones. - * - * The EC driver uses the system workqueue, so that one needs to be - * flushed too. - */ - acpi_os_wait_events_complete(); - flush_scheduled_work(); -} - static void acpi_freeze_restore(void) { acpi_disable_wakeup_devices(ACPI_STATE_S0); if (acpi_sci_irq_valid()) disable_irq_wake(acpi_sci_irq); - acpi_enable_all_runtime_gpes(); } @@ -707,8 +681,6 @@ static void acpi_freeze_end(void) static const struct platform_freeze_ops acpi_freeze_ops = { .begin = acpi_freeze_begin, .prepare = acpi_freeze_prepare, - .wake = acpi_freeze_wake, - .sync = acpi_freeze_sync, .restore = acpi_freeze_restore, .end = acpi_freeze_end, }; diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index e987a6f55d36..9faee1c893e5 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1091,6 +1091,11 @@ static int __device_suspend_noirq(struct device *dev, pm_message_t state, bool a if (async_error) goto Complete; + if (pm_wakeup_pending()) { + async_error = -EBUSY; + goto Complete; + } + if (dev->power.syscore || dev->power.direct_complete) goto Complete; diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 9c36b27996fc..c313b600d356 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -28,8 +28,8 @@ bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ unsigned int pm_wakeup_irq __read_mostly; -/* If greater than 0 and the system is suspending, terminate the suspend. */ -static atomic_t pm_abort_suspend __read_mostly; +/* If set and the system is suspending, terminate the suspend. */ +static bool pm_abort_suspend __read_mostly; /* * Combined counters of registered wakeup events and wakeup events in progress. @@ -855,26 +855,20 @@ bool pm_wakeup_pending(void) pm_print_active_wakeup_sources(); } - return ret || atomic_read(&pm_abort_suspend) > 0; + return ret || pm_abort_suspend; } void pm_system_wakeup(void) { - atomic_inc(&pm_abort_suspend); + pm_abort_suspend = true; freeze_wake(); } EXPORT_SYMBOL_GPL(pm_system_wakeup); -void pm_system_cancel_wakeup(void) -{ - atomic_dec(&pm_abort_suspend); -} - -void pm_wakeup_clear(bool reset) +void pm_wakeup_clear(void) { + pm_abort_suspend = false; pm_wakeup_irq = 0; - if (reset) - atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 0b1cf32edfd7..d9718378a8be 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -189,8 +189,6 @@ struct platform_suspend_ops { struct platform_freeze_ops { int (*begin)(void); int (*prepare)(void); - void (*wake)(void); - void (*sync)(void); void (*restore)(void); void (*end)(void); }; @@ -430,8 +428,7 @@ extern unsigned int pm_wakeup_irq; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); -extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(void); extern void pm_system_irq_wakeup(unsigned int irq_number); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); @@ -481,7 +478,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} -static inline void pm_wakeup_clear(bool reset) {} +static inline void pm_wakeup_clear(void) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline void lock_system_sleep(void) {} diff --git a/kernel/power/process.c b/kernel/power/process.c index 78672d324a6e..c7209f060eeb 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -132,7 +132,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(true); + pm_wakeup_clear(); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c0248c74d6d4..15e6baef5c73 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -72,8 +72,6 @@ static void freeze_begin(void) static void freeze_enter(void) { - trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, true); - spin_lock_irq(&suspend_freeze_lock); if (pm_wakeup_pending()) goto out; @@ -100,27 +98,6 @@ static void freeze_enter(void) out: suspend_freeze_state = FREEZE_STATE_NONE; spin_unlock_irq(&suspend_freeze_lock); - - trace_suspend_resume(TPS("machine_suspend"), PM_SUSPEND_FREEZE, false); -} - -static void s2idle_loop(void) -{ - do { - freeze_enter(); - - if (freeze_ops && freeze_ops->wake) - freeze_ops->wake(); - - dpm_resume_noirq(PMSG_RESUME); - if (freeze_ops && freeze_ops->sync) - freeze_ops->sync(); - - if (pm_wakeup_pending()) - break; - - pm_wakeup_clear(false); - } while (!dpm_suspend_noirq(PMSG_SUSPEND)); } void freeze_wake(void) @@ -394,8 +371,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) * all the devices are suspended. */ if (state == PM_SUSPEND_FREEZE) { - s2idle_loop(); - goto Platform_early_resume; + trace_suspend_resume(TPS("machine_suspend"), state, true); + freeze_enter(); + trace_suspend_resume(TPS("machine_suspend"), state, false); + goto Platform_wake; } error = disable_nonboot_cpus(); -- cgit v1.2.3 From 1123a6041654e8f889014659593bad4168e542c2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 31 May 2017 14:03:11 +0200 Subject: srcu: Allow use of Classic SRCU from both process and interrupt context Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting down a guest running iperf on a VFIO assigned device. This happens because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt context, while a worker thread does the same inside kvm_set_irq(). If the interrupt happens while the worker thread is executing __srcu_read_lock(), updates to the Classic SRCU ->lock_count[] field or the Tree SRCU ->srcu_lock_count[] field can be lost. The docs say you are not supposed to call srcu_read_lock() and srcu_read_unlock() from irq context, but KVM interrupt injection happens from (host) interrupt context and it would be nice if SRCU supported the use case. KVM is using SRCU here not really for the "sleepable" part, but rather due to its IPI-free fast detection of grace periods. It is therefore not desirable to switch back to RCU, which would effectively revert commit 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING", 2014-01-16). However, the docs are overly conservative. You can have an SRCU instance only has users in irq context, and you can mix process and irq context as long as process context users disable interrupts. In addition, __srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and Classic SRCU. For those two implementations, only srcu_read_lock() is unsafe. When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(), in commit 5a41344a3d83 ("srcu: Simplify __srcu_read_unlock() via this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments. Therefore it kept __this_cpu_inc(), with preempt_disable/enable in the caller. Tree SRCU however only does one increment, so on most architectures it is more efficient for __srcu_read_lock() to use this_cpu_inc(), and any performance differences appear to be down in the noise. Cc: stable@vger.kernel.org Fixes: 719d93cd5f5c ("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING") Reported-by: Linu Cherian Suggested-by: Linu Cherian Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Cc: Linus Torvalds Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 -- kernel/rcu/srcu.c | 5 ++--- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 167ad8831aaf..4c1d5f7e62c4 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -172,9 +172,7 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { int retval; - preempt_disable(); retval = __srcu_read_lock(sp); - preempt_enable(); rcu_lock_acquire(&(sp)->dep_map); return retval; } diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 584d8a983883..dea03614263f 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c @@ -263,7 +263,7 @@ EXPORT_SYMBOL_GPL(cleanup_srcu_struct); /* * Counts the new reader in the appropriate per-CPU element of the - * srcu_struct. Must be called from process context. + * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *sp) @@ -271,7 +271,7 @@ int __srcu_read_lock(struct srcu_struct *sp) int idx; idx = READ_ONCE(sp->completed) & 0x1; - __this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); + this_cpu_inc(sp->per_cpu_ref->lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -281,7 +281,6 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * Removes the count for the old reader from the appropriate per-CPU * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). - * Must be called from process context. */ void __srcu_read_unlock(struct srcu_struct *sp, int idx) { -- cgit v1.2.3