From 0b04d4c0542e8573a837b1d81b94209e48723b25 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 6 Mar 2023 12:25:49 +0000 Subject: f2fs: Fix f2fs_truncate_partial_nodes ftrace event Fix the nid_t field so that its size is correctly reported in the text format embedded in trace.dat files. As it stands, it is reported as being of size 4: field:nid_t nid[3]; offset:24; size:4; signed:0; Instead of 12: field:nid_t nid[3]; offset:24; size:12; signed:0; This also fixes the reported offset of subsequent fields so that they match with the actual struct layout. Signed-off-by: Douglas Raillard Reviewed-by: Mukesh Ojha Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/trace/events/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 1322d34a5dfc..99cbc5949e3c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -512,7 +512,7 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(nid_t, nid[3]) + __array(nid_t, nid, 3) __field(int, depth) __field(int, err) ), -- cgit v1.2.3 From 8b3a149db461d3286d1e211112de3b44ccaeaf71 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 12 Mar 2023 23:00:03 +0100 Subject: efi: earlycon: Reprobe after parsing config tables Commit 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") reorganized the earlycon handling so that all architectures pass the screen_info data via a EFI config table instead of populating struct screen_info directly, as the latter is only possible when the EFI stub is baked into the kernel (and not into the decompressor). However, this means that struct screen_info may not have been populated yet by the time the earlycon probe takes place, and this results in a non-functional early console. So let's probe again right after parsing the config tables and populating struct screen_info. Note that this means that earlycon output starts a bit later than before, and so it may fail to capture issues that occur while doing the early EFI initialization. Fixes: 732ea9db9d8a ("efi: libstub: Move screen_info handling to common code") Reported-by: Shawn Guo Tested-by: Shawn Guo Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/earlycon.c | 16 +++++++++++++--- drivers/firmware/efi/efi-init.c | 3 +++ include/linux/efi.h | 1 + 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c index f54e6fdf08e2..f80a9af3d16e 100644 --- a/drivers/firmware/efi/earlycon.c +++ b/drivers/firmware/efi/earlycon.c @@ -215,6 +215,14 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num) } } +static bool __initdata fb_probed; + +void __init efi_earlycon_reprobe(void) +{ + if (fb_probed) + setup_earlycon("efifb"); +} + static int __init efi_earlycon_setup(struct earlycon_device *device, const char *opt) { @@ -222,15 +230,17 @@ static int __init efi_earlycon_setup(struct earlycon_device *device, u16 xres, yres; u32 i; - if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) + fb_wb = opt && !strcmp(opt, "ram"); + + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI) { + fb_probed = true; return -ENODEV; + } fb_base = screen_info.lfb_base; if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) fb_base |= (u64)screen_info.ext_lfb_base << 32; - fb_wb = opt && !strcmp(opt, "ram"); - si = &screen_info; xres = si->lfb_width; yres = si->lfb_height; diff --git a/drivers/firmware/efi/efi-init.c b/drivers/firmware/efi/efi-init.c index 2c16080e1f71..ef0820f1a924 100644 --- a/drivers/firmware/efi/efi-init.c +++ b/drivers/firmware/efi/efi-init.c @@ -72,6 +72,9 @@ static void __init init_screen_info(void) if (memblock_is_map_memory(screen_info.lfb_base)) memblock_mark_nomap(screen_info.lfb_base, screen_info.lfb_size); + + if (IS_ENABLED(CONFIG_EFI_EARLYCON)) + efi_earlycon_reprobe(); } } diff --git a/include/linux/efi.h b/include/linux/efi.h index 04a733f0ba95..7aa62c92185f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -693,6 +693,7 @@ efi_guid_to_str(efi_guid_t *guid, char *out) } extern void efi_init (void); +extern void efi_earlycon_reprobe(void); #ifdef CONFIG_EFI extern void efi_enter_virtual_mode (void); /* switch EFI to virtual mode, if possible */ #else -- cgit v1.2.3 From 7ff84910c66c9144cc0de9d9deed9fb84c03aff0 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 14 Mar 2023 06:20:58 -0400 Subject: lockd: set file_lock start and end when decoding nlm4 testargs Commit 6930bcbfb6ce dropped the setting of the file_lock range when decoding a nlm_lock off the wire. This causes the client side grant callback to miss matching blocks and reject the lock, only to rerequest it 30s later. Add a helper function to set the file_lock range from the start and end values that the protocol uses, and have the nlm_lock decoder call that to set up the file_lock args properly. Fixes: 6930bcbfb6ce ("lockd: detect and reject lock arguments that overflow") Reported-by: Amir Goldstein Signed-off-by: Jeff Layton Tested-by: Amir Goldstein Cc: stable@vger.kernel.org #6.0 Signed-off-by: Anna Schumaker --- fs/lockd/clnt4xdr.c | 9 +-------- fs/lockd/xdr4.c | 13 ++++++++++++- include/linux/lockd/xdr4.h | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 7df6324ccb8a..8161667c976f 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -261,7 +261,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) u32 exclusive; int error; __be32 *p; - s32 end; memset(lock, 0, sizeof(*lock)); locks_init_lock(fl); @@ -285,13 +284,7 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result) fl->fl_type = exclusive != 0 ? F_WRLCK : F_RDLCK; p = xdr_decode_hyper(p, &l_offset); xdr_decode_hyper(p, &l_len); - end = l_offset + l_len - 1; - - fl->fl_start = (loff_t)l_offset; - if (l_len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = (loff_t)end; + nlm4svc_set_file_lock_range(fl, l_offset, l_len); error = 0; out: return error; diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 712fdfeb8ef0..5fcbf30cd275 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -33,6 +33,17 @@ loff_t_to_s64(loff_t offset) return res; } +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len) +{ + s64 end = off + len - 1; + + fl->fl_start = off; + if (len == 0 || end < 0) + fl->fl_end = OFFSET_MAX; + else + fl->fl_end = end; +} + /* * NLM file handles are defined by specification to be a variable-length * XDR opaque no longer than 1024 bytes. However, this implementation @@ -80,7 +91,7 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - + nlm4svc_set_file_lock_range(fl, lock->lock_start, lock->lock_len); return true; } diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h index 9a6b55da8fd6..72831e35dca3 100644 --- a/include/linux/lockd/xdr4.h +++ b/include/linux/lockd/xdr4.h @@ -22,6 +22,7 @@ #define nlm4_fbig cpu_to_be32(NLM_FBIG) #define nlm4_failed cpu_to_be32(NLM_FAILED) +void nlm4svc_set_file_lock_range(struct file_lock *fl, u64 off, u64 len); bool nlm4svc_decode_void(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_testargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); bool nlm4svc_decode_lockargs(struct svc_rqst *rqstp, struct xdr_stream *xdr); -- cgit v1.2.3 From 3615c78673c332b69aaacefbcde5937c5c706686 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 14 Mar 2023 13:31:02 +0100 Subject: efi: sysfb_efi: Fix DMI quirks not working for simpledrm Commit 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") moved the sysfb_apply_efi_quirks() call in sysfb_init() from before the [sysfb_]parse_mode() call to after it. But sysfb_apply_efi_quirks() modifies the global screen_info struct which [sysfb_]parse_mode() parses, so doing it later is too late. This has broken all DMI based quirks for correcting wrong firmware efifb settings when simpledrm is used. To fix this move the sysfb_apply_efi_quirks() call back to its old place and split the new setup of the efifb_fwnode (which requires the platform_device) into its own function and call that at the place of the moved sysfb_apply_efi_quirks(pd) calls. Fixes: 8633ef82f101 ("drivers/firmware: consolidate EFI framebuffer setup for all arches") Cc: stable@vger.kernel.org Cc: Javier Martinez Canillas Cc: Thomas Zimmermann Signed-off-by: Hans de Goede Reviewed-by: Javier Martinez Canillas Signed-off-by: Ard Biesheuvel --- drivers/firmware/efi/sysfb_efi.c | 5 ++++- drivers/firmware/sysfb.c | 4 +++- drivers/firmware/sysfb_simplefb.c | 2 +- include/linux/sysfb.h | 9 +++++++-- 4 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/firmware/efi/sysfb_efi.c b/drivers/firmware/efi/sysfb_efi.c index f06fdacc9bc8..e76d6803bdd0 100644 --- a/drivers/firmware/efi/sysfb_efi.c +++ b/drivers/firmware/efi/sysfb_efi.c @@ -341,7 +341,7 @@ static const struct fwnode_operations efifb_fwnode_ops = { #ifdef CONFIG_EFI static struct fwnode_handle efifb_fwnode; -__init void sysfb_apply_efi_quirks(struct platform_device *pd) +__init void sysfb_apply_efi_quirks(void) { if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI || !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS)) @@ -355,7 +355,10 @@ __init void sysfb_apply_efi_quirks(struct platform_device *pd) screen_info.lfb_height = temp; screen_info.lfb_linelength = 4 * screen_info.lfb_width; } +} +__init void sysfb_set_efifb_fwnode(struct platform_device *pd) +{ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI && IS_ENABLED(CONFIG_PCI)) { fwnode_init(&efifb_fwnode, &efifb_fwnode_ops); pd->dev.fwnode = &efifb_fwnode; diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 3fd3563d962b..3c197db42c9d 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -81,6 +81,8 @@ static __init int sysfb_init(void) if (disabled) goto unlock_mutex; + sysfb_apply_efi_quirks(); + /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { @@ -107,7 +109,7 @@ static __init int sysfb_init(void) goto unlock_mutex; } - sysfb_apply_efi_quirks(pd); + sysfb_set_efifb_fwnode(pd); ret = platform_device_add_data(pd, si, sizeof(*si)); if (ret) diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index ce9c007ed66f..82c64cb9f531 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -141,7 +141,7 @@ __init struct platform_device *sysfb_create_simplefb(const struct screen_info *s if (!pd) return ERR_PTR(-ENOMEM); - sysfb_apply_efi_quirks(pd); + sysfb_set_efifb_fwnode(pd); ret = platform_device_add_resources(pd, &res, 1); if (ret) diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index 8ba8b5be5567..c1ef5fc60a3c 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -70,11 +70,16 @@ static inline void sysfb_disable(void) #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; -void sysfb_apply_efi_quirks(struct platform_device *pd); +void sysfb_apply_efi_quirks(void); +void sysfb_set_efifb_fwnode(struct platform_device *pd); #else /* CONFIG_EFI */ -static inline void sysfb_apply_efi_quirks(struct platform_device *pd) +static inline void sysfb_apply_efi_quirks(void) +{ +} + +static inline void sysfb_set_efifb_fwnode(struct platform_device *pd) { } -- cgit v1.2.3 From 99669259f3361d759219811e670b7e0742668556 Mon Sep 17 00:00:00 2001 From: Maxime Bizon Date: Thu, 16 Mar 2023 16:33:16 -0700 Subject: net: mdio: fix owner field for mdio buses registered using device-tree Bus ownership is wrong when using of_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. Signed-off-by: Maxime Bizon Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") [florian: fix kdoc, added Fixes tag] Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/mdio/of_mdio.c | 12 +++++++----- drivers/net/phy/mdio_devres.c | 11 ++++++----- include/linux/of_mdio.h | 22 +++++++++++++++++++--- 3 files changed, 32 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 510822d6d0d9..1e46e39f5f46 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -139,21 +139,23 @@ bool of_mdiobus_child_is_phy(struct device_node *child) EXPORT_SYMBOL(of_mdiobus_child_is_phy); /** - * of_mdiobus_register - Register mii_bus and create PHYs from the device tree + * __of_mdiobus_register - Register mii_bus and create PHYs from the device tree * @mdio: pointer to mii_bus structure * @np: pointer to device_node of MDIO bus. + * @owner: module owning the @mdio object. * * This function registers the mii_bus structure and registers a phy_device * for each child node of @np. */ -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner) { struct device_node *child; bool scanphys = false; int addr, rc; if (!np) - return mdiobus_register(mdio); + return __mdiobus_register(mdio, owner); /* Do not continue if the node is disabled */ if (!of_device_is_available(np)) @@ -172,7 +174,7 @@ int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) of_property_read_u32(np, "reset-post-delay-us", &mdio->reset_post_delay_us); /* Register the MDIO bus */ - rc = mdiobus_register(mdio); + rc = __mdiobus_register(mdio, owner); if (rc) return rc; @@ -236,7 +238,7 @@ unregister: mdiobus_unregister(mdio); return rc; } -EXPORT_SYMBOL(of_mdiobus_register); +EXPORT_SYMBOL(__of_mdiobus_register); /** * of_mdio_find_device - Given a device tree node, find the mdio_device diff --git a/drivers/net/phy/mdio_devres.c b/drivers/net/phy/mdio_devres.c index b560e99695df..69b829e6ab35 100644 --- a/drivers/net/phy/mdio_devres.c +++ b/drivers/net/phy/mdio_devres.c @@ -98,13 +98,14 @@ EXPORT_SYMBOL(__devm_mdiobus_register); #if IS_ENABLED(CONFIG_OF_MDIO) /** - * devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() + * __devm_of_mdiobus_register - Resource managed variant of of_mdiobus_register() * @dev: Device to register mii_bus for * @mdio: MII bus structure to register * @np: Device node to parse + * @owner: Owning module */ -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np) +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner) { struct mdiobus_devres *dr; int ret; @@ -117,7 +118,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, if (!dr) return -ENOMEM; - ret = of_mdiobus_register(mdio, np); + ret = __of_mdiobus_register(mdio, np, owner); if (ret) { devres_free(dr); return ret; @@ -127,7 +128,7 @@ int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, devres_add(dev, dr); return 0; } -EXPORT_SYMBOL(devm_of_mdiobus_register); +EXPORT_SYMBOL(__devm_of_mdiobus_register); #endif /* CONFIG_OF_MDIO */ MODULE_LICENSE("GPL"); diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index da633d34ab86..8a52ef2e6fa6 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -14,9 +14,25 @@ #if IS_ENABLED(CONFIG_OF_MDIO) bool of_mdiobus_child_is_phy(struct device_node *child); -int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); -int devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, - struct device_node *np); +int __of_mdiobus_register(struct mii_bus *mdio, struct device_node *np, + struct module *owner); + +static inline int of_mdiobus_register(struct mii_bus *mdio, + struct device_node *np) +{ + return __of_mdiobus_register(mdio, np, THIS_MODULE); +} + +int __devm_of_mdiobus_register(struct device *dev, struct mii_bus *mdio, + struct device_node *np, struct module *owner); + +static inline int devm_of_mdiobus_register(struct device *dev, + struct mii_bus *mdio, + struct device_node *np) +{ + return __devm_of_mdiobus_register(dev, mdio, np, THIS_MODULE); +} + struct mdio_device *of_mdio_find_device(struct device_node *np); struct phy_device *of_phy_find_device(struct device_node *phy_np); struct phy_device * -- cgit v1.2.3 From 30b605b8501e321f79e19c3238aa6ca31da6087c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 16 Mar 2023 16:33:17 -0700 Subject: net: mdio: fix owner field for mdio buses registered using ACPI Bus ownership is wrong when using acpi_mdiobus_register() to register an mdio bus. That function is not inline, so when it calls mdiobus_register() the wrong THIS_MODULE value is captured. CC: Maxime Bizon Fixes: 803ca24d2f92 ("net: mdio: Add ACPI support code for mdio") Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/mdio/acpi_mdio.c | 10 ++++++---- include/linux/acpi_mdio.h | 9 ++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/mdio/acpi_mdio.c b/drivers/net/mdio/acpi_mdio.c index d77c987fda9c..4630dde01974 100644 --- a/drivers/net/mdio/acpi_mdio.c +++ b/drivers/net/mdio/acpi_mdio.c @@ -18,16 +18,18 @@ MODULE_AUTHOR("Calvin Johnson "); MODULE_LICENSE("GPL"); /** - * acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. + * __acpi_mdiobus_register - Register mii_bus and create PHYs from the ACPI ASL. * @mdio: pointer to mii_bus structure * @fwnode: pointer to fwnode of MDIO bus. This fwnode is expected to represent + * @owner: module owning this @mdio object. * an ACPI device object corresponding to the MDIO bus and its children are * expected to correspond to the PHY devices on that bus. * * This function registers the mii_bus structure and registers a phy_device * for each child node of @fwnode. */ -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner) { struct fwnode_handle *child; u32 addr; @@ -35,7 +37,7 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) /* Mask out all PHYs from auto probing. */ mdio->phy_mask = GENMASK(31, 0); - ret = mdiobus_register(mdio); + ret = __mdiobus_register(mdio, owner); if (ret) return ret; @@ -55,4 +57,4 @@ int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) } return 0; } -EXPORT_SYMBOL(acpi_mdiobus_register); +EXPORT_SYMBOL(__acpi_mdiobus_register); diff --git a/include/linux/acpi_mdio.h b/include/linux/acpi_mdio.h index 0a24ab7cb66f..8e2eefa9fbc0 100644 --- a/include/linux/acpi_mdio.h +++ b/include/linux/acpi_mdio.h @@ -9,7 +9,14 @@ #include #if IS_ENABLED(CONFIG_ACPI_MDIO) -int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode); +int __acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode, + struct module *owner); + +static inline int +acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *handle) +{ + return __acpi_mdiobus_register(mdio, handle, THIS_MODULE); +} #else /* CONFIG_ACPI_MDIO */ static inline int acpi_mdiobus_register(struct mii_bus *mdio, struct fwnode_handle *fwnode) -- cgit v1.2.3 From 070246e4674b125860d311c18ce2623e73e2bd51 Mon Sep 17 00:00:00 2001 From: Jochen Henneberg Date: Fri, 17 Mar 2023 09:08:17 +0100 Subject: net: stmmac: Fix for mismatched host/device DMA address width Currently DMA address width is either read from a RO device register or force set from the platform data. This breaks DMA when the host DMA address width is <=32it but the device is >32bit. Right now the driver may decide to use a 2nd DMA descriptor for another buffer (happens in case of TSO xmit) assuming that 32bit addressing is used due to platform configuration but the device will still use both descriptor addresses as one address. This can be observed with the Intel EHL platform driver that sets 32bit for addr64 but the MAC reports 40bit. The TX queue gets stuck in case of TCP with iptables NAT configuration on TSO packets. The logic should be like this: Whatever we do on the host side (memory allocation GFP flags) should happen with the host DMA width, whenever we decide how to set addresses on the device registers we must use the device DMA address width. This patch renames the platform address width field from addr64 (term used in device datasheet) to host_addr and uses this value exclusively for host side operations while all chip operations consider the device DMA width as read from the device register. Fixes: 7cfc4486e7ea ("stmmac: intel: Configure EHL PSE0 GbE and PSE1 GbE to 32 bits DMA addressing") Signed-off-by: Jochen Henneberg Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/common.h | 1 + drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 +-- .../net/ethernet/stmicro/stmmac/dwmac-mediatek.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ++++++++++++---------- include/linux/stmmac.h | 2 +- 6 files changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 6b5d96bced47..ec9c130276d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -418,6 +418,7 @@ struct dma_features { unsigned int frpbs; unsigned int frpes; unsigned int addr64; + unsigned int host_dma_width; unsigned int rssen; unsigned int vlhash; unsigned int sphen; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index ac550d1ac015..2a2be65d65a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -288,7 +288,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) goto err_parse_dt; } - plat_dat->addr64 = dwmac->ops->addr_width; + plat_dat->host_dma_width = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; plat_dat->clks_config = imx_dwmac_clks_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 7deb1f817dac..13aa919633b4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -684,7 +684,7 @@ static int ehl_pse0_common_data(struct pci_dev *pdev, intel_priv->is_pse = true; plat->bus_id = 2; - plat->addr64 = 32; + plat->host_dma_width = 32; plat->clk_ptp_rate = 200000000; @@ -725,7 +725,7 @@ static int ehl_pse1_common_data(struct pci_dev *pdev, intel_priv->is_pse = true; plat->bus_id = 3; - plat->addr64 = 32; + plat->host_dma_width = 32; plat->clk_ptp_rate = 200000000; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 2f7d8e4561d9..9ae31e3dc821 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -591,7 +591,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->use_phy_wol = priv_plat->mac_wol ? 0 : 1; plat->riwt_off = 1; plat->maxmtu = ETH_DATA_LEN; - plat->addr64 = priv_plat->variant->dma_bit_mask; + plat->host_dma_width = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; plat->init = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 8f543c3ab5c5..17310ade88dd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1431,7 +1431,7 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i]; gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - if (priv->dma_cap.addr64 <= 32) + if (priv->dma_cap.host_dma_width <= 32) gfp |= GFP_DMA32; if (!buf->page) { @@ -4587,7 +4587,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) unsigned int entry = rx_q->dirty_rx; gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); - if (priv->dma_cap.addr64 <= 32) + if (priv->dma_cap.host_dma_width <= 32) gfp |= GFP_DMA32; while (dirty-- > 0) { @@ -6205,7 +6205,7 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v) seq_printf(seq, "\tFlexible RX Parser: %s\n", priv->dma_cap.frpsel ? "Y" : "N"); seq_printf(seq, "\tEnhanced Addressing: %d\n", - priv->dma_cap.addr64); + priv->dma_cap.host_dma_width); seq_printf(seq, "\tReceive Side Scaling: %s\n", priv->dma_cap.rssen ? "Y" : "N"); seq_printf(seq, "\tVLAN Hash Filtering: %s\n", @@ -7178,20 +7178,22 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "SPH feature enabled\n"); } - /* The current IP register MAC_HW_Feature1[ADDR64] only define - * 32/40/64 bit width, but some SOC support others like i.MX8MP - * support 34 bits but it map to 40 bits width in MAC_HW_Feature1[ADDR64]. - * So overwrite dma_cap.addr64 according to HW real design. + /* Ideally our host DMA address width is the same as for the + * device. However, it may differ and then we have to use our + * host DMA width for allocation and the device DMA width for + * register handling. */ - if (priv->plat->addr64) - priv->dma_cap.addr64 = priv->plat->addr64; + if (priv->plat->host_dma_width) + priv->dma_cap.host_dma_width = priv->plat->host_dma_width; + else + priv->dma_cap.host_dma_width = priv->dma_cap.addr64; - if (priv->dma_cap.addr64) { + if (priv->dma_cap.host_dma_width) { ret = dma_set_mask_and_coherent(device, - DMA_BIT_MASK(priv->dma_cap.addr64)); + DMA_BIT_MASK(priv->dma_cap.host_dma_width)); if (!ret) { - dev_info(priv->device, "Using %d bits DMA width\n", - priv->dma_cap.addr64); + dev_info(priv->device, "Using %d/%d bits DMA host/device width\n", + priv->dma_cap.host_dma_width, priv->dma_cap.addr64); /* * If more than 32 bits can be addressed, make sure to @@ -7206,7 +7208,7 @@ int stmmac_dvr_probe(struct device *device, goto error_hw_init; } - priv->dma_cap.addr64 = 32; + priv->dma_cap.host_dma_width = 32; } } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index a152678b82b7..a2414c187483 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -215,7 +215,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; - u32 addr64; + u32 host_dma_width; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; -- cgit v1.2.3 From 1470afefc3c42df5d1662f87d079b46651bdc95b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:02 -0700 Subject: cpumask: introduce for_each_cpu_or Equivalent of for_each_cpu_and, except it ORs the two masks together so it iterates all the CPUs present in either mask. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/cpumask.h | 17 +++++++++++++++++ include/linux/find.h | 37 +++++++++++++++++++++++++++++++++++++ lib/find_bit.c | 9 +++++++++ 3 files changed, 63 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 8fbe76607965..220974ef1bf5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -350,6 +350,23 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta #define for_each_cpu_andnot(cpu, mask1, mask2) \ for_each_andnot_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) +/** + * for_each_cpu_or - iterate over every cpu present in either mask + * @cpu: the (optionally unsigned) integer iterator + * @mask1: the first cpumask pointer + * @mask2: the second cpumask pointer + * + * This saves a temporary CPU mask in many places. It is equivalent to: + * struct cpumask tmp; + * cpumask_or(&tmp, &mask1, &mask2); + * for_each_cpu(cpu, &tmp) + * ... + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_or(cpu, mask1, mask2) \ + for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) + /** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search diff --git a/include/linux/find.h b/include/linux/find.h index 4647864a5ffd..5e4f39ef2e72 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -14,6 +14,8 @@ unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long unsigned long nbits, unsigned long start); unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start); +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start); unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); @@ -127,6 +129,36 @@ unsigned long find_next_andnot_bit(const unsigned long *addr1, } #endif +#ifndef find_next_or_bit +/** + * find_next_or_bit - find the next set bit in either memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_or_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = (*addr1 | *addr2) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_or_bit(addr1, addr2, size, offset); +} +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -536,6 +568,11 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ (bit)++) +#define for_each_or_bit(bit, addr1, addr2, size) \ + for ((bit) = 0; \ + (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ + (bit)++) + /* same as for_each_set_bit() but use bit as value to start with */ #define for_each_set_bit_from(bit, addr, size) \ for (; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++) diff --git a/lib/find_bit.c b/lib/find_bit.c index c10920e66788..32f99e9a670e 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -182,6 +182,15 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l EXPORT_SYMBOL(_find_next_andnot_bit); #endif +#ifndef find_next_or_bit +unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long nbits, unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_or_bit); +#endif + #ifndef find_next_zero_bit unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start) -- cgit v1.2.3 From e9b60c7f97130795c7aa81a649ae4b93a172a277 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Wed, 15 Mar 2023 17:31:03 -0700 Subject: pcpcntr: remove percpu_counter_sum_all() percpu_counter_sum_all() is now redundant as the race condition it was invented to handle is now dealt with by percpu_counter_sum() directly and all users of percpu_counter_sum_all() have been removed. Remove it. This effectively reverts the changes made in f689054aace2 ("percpu_counter: add percpu_counter_sum_all interface") except for the cpumask iteration that fixes percpu_counter_sum() made earlier in this series. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- include/linux/percpu_counter.h | 6 ------ lib/percpu_counter.c | 40 +++++++++++----------------------------- 2 files changed, 11 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 521a733e21a9..75b73c83bc9d 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -45,7 +45,6 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); -s64 percpu_counter_sum_all(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); @@ -196,11 +195,6 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc) return percpu_counter_read(fbc); } -static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return percpu_counter_read(fbc); -} - static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 0e096311e0c0..5004463c4f9f 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -122,23 +122,6 @@ void percpu_counter_sync(struct percpu_counter *fbc) } EXPORT_SYMBOL(percpu_counter_sync); -static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, - const struct cpumask *cpu_mask) -{ - s64 ret; - int cpu; - unsigned long flags; - - raw_spin_lock_irqsave(&fbc->lock, flags); - ret = fbc->count; - for_each_cpu_or(cpu, cpu_online_mask, cpu_mask) { - s32 *pcount = per_cpu_ptr(fbc->counters, cpu); - ret += *pcount; - } - raw_spin_unlock_irqrestore(&fbc->lock, flags); - return ret; -} - /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive(). @@ -153,22 +136,21 @@ static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc, */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { + s64 ret; + int cpu; + unsigned long flags; - return __percpu_counter_sum_mask(fbc, cpu_dying_mask); + raw_spin_lock_irqsave(&fbc->lock, flags); + ret = fbc->count; + for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { + s32 *pcount = per_cpu_ptr(fbc->counters, cpu); + ret += *pcount; + } + raw_spin_unlock_irqrestore(&fbc->lock, flags); + return ret; } EXPORT_SYMBOL(__percpu_counter_sum); -/* - * This is slower version of percpu_counter_sum as it traverses all possible - * cpus. Use this only in the cases where accurate data is needed in the - * presense of CPUs getting offlined. - */ -s64 percpu_counter_sum_all(struct percpu_counter *fbc) -{ - return __percpu_counter_sum_mask(fbc, cpu_possible_mask); -} -EXPORT_SYMBOL(percpu_counter_sum_all); - int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp, struct lock_class_key *key) { -- cgit v1.2.3 From 9d2789ac9d60c049d26ef6d3005d9c94c5a559e9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Mar 2023 20:01:25 -0600 Subject: block/io_uring: pass in issue_flags for uring_cmd task_work handling io_uring_cmd_done() currently assumes that the uring_lock is held when invoked, and while it generally is, this is not guaranteed. Pass in the issue_flags associated with it, so that we have IO_URING_F_UNLOCKED available to be able to lock the CQ ring appropriately when completing events. Cc: stable@vger.kernel.org Fixes: ee692a21e9bf ("fs,io_uring: add infrastructure for uring-cmd") Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 31 ++++++++++++++++++------------- drivers/nvme/host/ioctl.c | 14 ++++++++------ include/linux/io_uring.h | 11 ++++++----- io_uring/uring_cmd.c | 10 ++++++---- 4 files changed, 38 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index fb5a557afde8..c73cc57ec547 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -715,7 +715,8 @@ static void __ublk_fail_req(struct ublk_queue *ubq, struct ublk_io *io, } } -static void ubq_complete_io_cmd(struct ublk_io *io, int res) +static void ubq_complete_io_cmd(struct ublk_io *io, int res, + unsigned issue_flags) { /* mark this cmd owned by ublksrv */ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV; @@ -727,7 +728,7 @@ static void ubq_complete_io_cmd(struct ublk_io *io, int res) io->flags &= ~UBLK_IO_FLAG_ACTIVE; /* tell ublksrv one io request is coming */ - io_uring_cmd_done(io->cmd, res, 0); + io_uring_cmd_done(io->cmd, res, 0, issue_flags); } #define UBLK_REQUEUE_DELAY_MS 3 @@ -744,7 +745,8 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq, mod_delayed_work(system_wq, &ubq->dev->monitor_work, 0); } -static inline void __ublk_rq_task_work(struct request *req) +static inline void __ublk_rq_task_work(struct request *req, + unsigned issue_flags) { struct ublk_queue *ubq = req->mq_hctx->driver_data; int tag = req->tag; @@ -782,7 +784,7 @@ static inline void __ublk_rq_task_work(struct request *req) pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n", __func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags); - ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA); + ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA, issue_flags); return; } /* @@ -820,17 +822,18 @@ static inline void __ublk_rq_task_work(struct request *req) mapped_bytes >> 9; } - ubq_complete_io_cmd(io, UBLK_IO_RES_OK); + ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags); } -static inline void ublk_forward_io_cmds(struct ublk_queue *ubq) +static inline void ublk_forward_io_cmds(struct ublk_queue *ubq, + unsigned issue_flags) { struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds); struct ublk_rq_data *data, *tmp; io_cmds = llist_reverse_order(io_cmds); llist_for_each_entry_safe(data, tmp, io_cmds, node) - __ublk_rq_task_work(blk_mq_rq_from_pdu(data)); + __ublk_rq_task_work(blk_mq_rq_from_pdu(data), issue_flags); } static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) @@ -842,12 +845,12 @@ static inline void ublk_abort_io_cmds(struct ublk_queue *ubq) __ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data)); } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd) +static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, unsigned issue_flags) { struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); struct ublk_queue *ubq = pdu->ubq; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_rq_task_work_fn(struct callback_head *work) @@ -856,8 +859,9 @@ static void ublk_rq_task_work_fn(struct callback_head *work) struct ublk_rq_data, work); struct request *req = blk_mq_rq_from_pdu(data); struct ublk_queue *ubq = req->mq_hctx->driver_data; + unsigned issue_flags = IO_URING_F_UNLOCKED; - ublk_forward_io_cmds(ubq); + ublk_forward_io_cmds(ubq, issue_flags); } static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq) @@ -1111,7 +1115,8 @@ static void ublk_cancel_queue(struct ublk_queue *ubq) struct ublk_io *io = &ubq->ios[i]; if (io->flags & UBLK_IO_FLAG_ACTIVE) - io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0); + io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0, + IO_URING_F_UNLOCKED); } /* all io commands are canceled */ @@ -1351,7 +1356,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) return -EIOCBQUEUED; out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: complete: cmd op %d, tag %d ret %x io_flags %x\n", __func__, cmd_op, tag, ret, io->flags); return -EIOCBQUEUED; @@ -2234,7 +2239,7 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd, if (ub) ublk_put_device(ub); out: - io_uring_cmd_done(cmd, ret, 0); + io_uring_cmd_done(cmd, ret, 0, issue_flags); pr_devel("%s: cmd done ret %d cmd_op %x, dev id %d qid %d\n", __func__, ret, cmd->cmd_op, header->dev_id, header->queue_id); return -EIOCBQUEUED; diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 723e7d5b778f..d24ea2e05156 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -464,7 +464,8 @@ static inline struct nvme_uring_cmd_pdu *nvme_uring_cmd_pdu( return (struct nvme_uring_cmd_pdu *)&ioucmd->pdu; } -static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd, + unsigned issue_flags) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); struct request *req = pdu->req; @@ -485,17 +486,18 @@ static void nvme_uring_task_meta_cb(struct io_uring_cmd *ioucmd) blk_rq_unmap_user(req->bio); blk_mq_free_request(req); - io_uring_cmd_done(ioucmd, status, result); + io_uring_cmd_done(ioucmd, status, result, issue_flags); } -static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd) +static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, + unsigned issue_flags) { struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); if (pdu->bio) blk_rq_unmap_user(pdu->bio); - io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result); + io_uring_cmd_done(ioucmd, pdu->nvme_status, pdu->u.result, issue_flags); } static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, @@ -517,7 +519,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, * Otherwise, move the completion to task work. */ if (cookie != NULL && blk_rq_is_poll(req)) - nvme_uring_task_cb(ioucmd); + nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_cb); @@ -539,7 +541,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io_meta(struct request *req, * Otherwise, move the completion to task work. */ if (cookie != NULL && blk_rq_is_poll(req)) - nvme_uring_task_meta_cb(ioucmd); + nvme_uring_task_meta_cb(ioucmd, IO_URING_F_UNLOCKED); else io_uring_cmd_complete_in_task(ioucmd, nvme_uring_task_meta_cb); diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 934e5dd4ccc0..35b9328ca335 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -27,7 +27,7 @@ struct io_uring_cmd { const void *cmd; union { /* callback to defer completions to task context */ - void (*task_work_cb)(struct io_uring_cmd *cmd); + void (*task_work_cb)(struct io_uring_cmd *cmd, unsigned); /* used for polled completion */ void *cookie; }; @@ -39,9 +39,10 @@ struct io_uring_cmd { #if defined(CONFIG_IO_URING) int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, struct iov_iter *iter, void *ioucmd); -void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); +void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2, + unsigned issue_flags); void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)); + void (*task_work_cb)(struct io_uring_cmd *, unsigned)); struct sock *io_uring_get_socket(struct file *file); void __io_uring_cancel(bool cancel_all); void __io_uring_free(struct task_struct *tsk); @@ -72,11 +73,11 @@ static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw, return -EOPNOTSUPP; } static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, - ssize_t ret2) + ssize_t ret2, unsigned issue_flags) { } static inline void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { } static inline struct sock *io_uring_get_socket(struct file *file) diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index 446a189b78b0..e535e8db01e3 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -15,12 +15,13 @@ static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) { struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); + unsigned issue_flags = *locked ? 0 : IO_URING_F_UNLOCKED; - ioucmd->task_work_cb(ioucmd); + ioucmd->task_work_cb(ioucmd, issue_flags); } void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) + void (*task_work_cb)(struct io_uring_cmd *, unsigned)) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); @@ -42,7 +43,8 @@ static inline void io_req_set_cqe32_extra(struct io_kiocb *req, * Called by consumers of io_uring_cmd, if they originally returned * -EIOCBQUEUED upon receiving the command. */ -void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) +void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2, + unsigned issue_flags) { struct io_kiocb *req = cmd_to_io_kiocb(ioucmd); @@ -56,7 +58,7 @@ void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) /* order with io_iopoll_req_issued() checking ->iopoll_complete */ smp_store_release(&req->iopoll_completed, 1); else - io_req_complete_post(req, 0); + io_req_complete_post(req, issue_flags); } EXPORT_SYMBOL_GPL(io_uring_cmd_done); -- cgit v1.2.3 From f87d28673b71b35b248231a2086f9404afbb7f28 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Sat, 25 Feb 2023 16:01:36 -0800 Subject: entry: Fix noinstr warning in __enter_from_user_mode() __enter_from_user_mode() is triggering noinstr warnings with CONFIG_DEBUG_PREEMPT due to its call of preempt_count_add() via ct_state(). The preemption disable isn't needed as interrupts are already disabled. And the context_tracking_enabled() check in ct_state() also isn't needed as that's already being done by the CT_WARN_ON(). Just use __ct_state() instead. Fixes the following warnings: vmlinux.o: warning: objtool: enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode+0xf9: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: syscall_enter_from_user_mode_prepare+0xc7: call to preempt_count_add() leaves .noinstr.text section vmlinux.o: warning: objtool: irqentry_enter_from_user_mode+0xba: call to preempt_count_add() leaves .noinstr.text section Fixes: 171476775d32 ("context_tracking: Convert state to atomic_t") Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/d8955fa6d68dc955dda19baf13ae014ae27926f5.1677369694.git.jpoimboe@kernel.org --- include/linux/context_tracking.h | 1 + include/linux/context_tracking_state.h | 2 ++ kernel/entry/common.c | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index d4afa8508a80..3a7909ed5498 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -96,6 +96,7 @@ static inline void user_exit_irqoff(void) { } static inline int exception_enter(void) { return 0; } static inline void exception_exit(enum ctx_state prev_ctx) { } static inline int ct_state(void) { return -1; } +static inline int __ct_state(void) { return -1; } static __always_inline bool context_tracking_guest_enter(void) { return false; } static inline void context_tracking_guest_exit(void) { } #define CT_WARN_ON(cond) do { } while (0) diff --git a/include/linux/context_tracking_state.h b/include/linux/context_tracking_state.h index 4a4d56f77180..fdd537ea513f 100644 --- a/include/linux/context_tracking_state.h +++ b/include/linux/context_tracking_state.h @@ -46,7 +46,9 @@ struct context_tracking { #ifdef CONFIG_CONTEXT_TRACKING DECLARE_PER_CPU(struct context_tracking, context_tracking); +#endif +#ifdef CONFIG_CONTEXT_TRACKING_USER static __always_inline int __ct_state(void) { return arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK; diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 846add8394c4..1314894d2efa 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -21,7 +21,7 @@ static __always_inline void __enter_from_user_mode(struct pt_regs *regs) arch_enter_from_user_mode(regs); lockdep_hardirqs_off(CALLER_ADDR0); - CT_WARN_ON(ct_state() != CONTEXT_USER); + CT_WARN_ON(__ct_state() != CONTEXT_USER); user_exit_irqoff(); instrumentation_begin(); -- cgit v1.2.3 From fbaa38214cd9e150764ccaa82e04ecf42cc1140c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 11 Mar 2023 15:40:01 +0100 Subject: cxl/pci: Fix CDAT retrieval on big endian The CDAT exposed in sysfs differs between little endian and big endian arches: On big endian, every 4 bytes are byte-swapped. PCI Configuration Space is little endian (PCI r3.0 sec 6.1). Accessors such as pci_read_config_dword() implicitly swap bytes on big endian. That way, the macros in include/uapi/linux/pci_regs.h work regardless of the arch's endianness. For an example of implicit byte-swapping, see ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx (Load Word Byte-Reverse Indexed). DOE Read/Write Data Mailbox Registers are unlike other registers in Configuration Space in that they contain or receive a 4 byte portion of an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f). They need to be copied to or from the request/response buffer verbatim. So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit byte-swapping. The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume implicit byte-swapping. Byte-swap requests after constructing them with those macros and byte-swap responses before parsing them. Change the request and response type to __le32 to avoid sparse warnings. Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for consistency. Fixes: c97006046c79 ("cxl/port: Read CDAT table") Tested-by: Ira Weiny Signed-off-by: Lukas Wunner Reviewed-by: Dan Williams Cc: stable@vger.kernel.org # v6.0+ Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 26 +++++++++++++------------- drivers/pci/doe.c | 25 ++++++++++++++----------- include/linux/pci-doe.h | 8 ++++++-- 3 files changed, 33 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 7328a2552411..49a99a84b6aa 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -462,7 +462,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport) return NULL; } -#define CDAT_DOE_REQ(entry_handle) \ +#define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \ (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ @@ -475,8 +475,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task) } struct cdat_doe_task { - u32 request_pl; - u32 response_pl[32]; + __le32 request_pl; + __le32 response_pl[32]; struct completion c; struct pci_doe_task task; }; @@ -510,10 +510,10 @@ static int cxl_cdat_get_length(struct device *dev, return rc; } wait_for_completion(&t.c); - if (t.task.rv < sizeof(u32)) + if (t.task.rv < sizeof(__le32)) return -EIO; - *length = t.response_pl[1]; + *length = le32_to_cpu(t.response_pl[1]); dev_dbg(dev, "CDAT length %zu\n", *length); return 0; @@ -524,13 +524,13 @@ static int cxl_cdat_read_table(struct device *dev, struct cxl_cdat *cdat) { size_t length = cdat->length; - u32 *data = cdat->table; + __le32 *data = cdat->table; int entry_handle = 0; do { DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t); size_t entry_dw; - u32 *entry; + __le32 *entry; int rc; rc = pci_doe_submit_task(cdat_doe, &t.task); @@ -540,21 +540,21 @@ static int cxl_cdat_read_table(struct device *dev, } wait_for_completion(&t.c); /* 1 DW header + 1 DW data min */ - if (t.task.rv < (2 * sizeof(u32))) + if (t.task.rv < (2 * sizeof(__le32))) return -EIO; /* Get the CXL table access header entry handle */ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, - t.response_pl[0]); + le32_to_cpu(t.response_pl[0])); entry = t.response_pl + 1; - entry_dw = t.task.rv / sizeof(u32); + entry_dw = t.task.rv / sizeof(__le32); /* Skip Header */ entry_dw -= 1; - entry_dw = min(length / sizeof(u32), entry_dw); + entry_dw = min(length / sizeof(__le32), entry_dw); /* Prevent length < 1 DW from causing a buffer overflow */ if (entry_dw) { - memcpy(data, entry, entry_dw * sizeof(u32)); - length -= entry_dw * sizeof(u32); + memcpy(data, entry, entry_dw * sizeof(__le32)); + length -= entry_dw * sizeof(__le32); data += entry_dw; } } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index 66d9ab288646..6f097932ccbf 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -128,7 +128,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, return -EIO; /* Length is 2 DW of header + length of payload in DW */ - length = 2 + task->request_pl_sz / sizeof(u32); + length = 2 + task->request_pl_sz / sizeof(__le32); if (length > PCI_DOE_MAX_LENGTH) return -EIO; if (length == PCI_DOE_MAX_LENGTH) @@ -141,9 +141,9 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, length)); - for (i = 0; i < task->request_pl_sz / sizeof(u32); i++) + for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, - task->request_pl[i]); + le32_to_cpu(task->request_pl[i])); pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO); @@ -195,11 +195,11 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas /* First 2 dwords have already been read */ length -= 2; - payload_length = min(length, task->response_pl_sz / sizeof(u32)); + payload_length = min(length, task->response_pl_sz / sizeof(__le32)); /* Read the rest of the response payload */ for (i = 0; i < payload_length; i++) { - pci_read_config_dword(pdev, offset + PCI_DOE_READ, - &task->response_pl[i]); + pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val); + task->response_pl[i] = cpu_to_le32(val); /* Prior to the last ack, ensure Data Object Ready */ if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb)) return -EIO; @@ -217,7 +217,7 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; - return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32); + return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32); } static void signal_task_complete(struct pci_doe_task *task, int rv) @@ -317,14 +317,16 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, { u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX, *index); + __le32 request_pl_le = cpu_to_le32(request_pl); + __le32 response_pl_le; u32 response_pl; DECLARE_COMPLETION_ONSTACK(c); struct pci_doe_task task = { .prot.vid = PCI_VENDOR_ID_PCI_SIG, .prot.type = PCI_DOE_PROTOCOL_DISCOVERY, - .request_pl = &request_pl, + .request_pl = &request_pl_le, .request_pl_sz = sizeof(request_pl), - .response_pl = &response_pl, + .response_pl = &response_pl_le, .response_pl_sz = sizeof(response_pl), .complete = pci_doe_task_complete, .private = &c, @@ -340,6 +342,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid, if (task.rv != sizeof(response_pl)) return -EIO; + response_pl = le32_to_cpu(response_pl_le); *vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl); *protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL, response_pl); @@ -533,8 +536,8 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task) * DOE requests must be a whole number of DW and the response needs to * be big enough for at least 1 DW */ - if (task->request_pl_sz % sizeof(u32) || - task->response_pl_sz < sizeof(u32)) + if (task->request_pl_sz % sizeof(__le32) || + task->response_pl_sz < sizeof(__le32)) return -EINVAL; if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h index ed9b4df792b8..43765eaf2342 100644 --- a/include/linux/pci-doe.h +++ b/include/linux/pci-doe.h @@ -34,6 +34,10 @@ struct pci_doe_mb; * @work: Used internally by the mailbox * @doe_mb: Used internally by the mailbox * + * Payloads are treated as opaque byte streams which are transmitted verbatim, + * without byte-swapping. If payloads contain little-endian register values, + * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu(). + * * The payload sizes and rv are specified in bytes with the following * restrictions concerning the protocol. * @@ -45,9 +49,9 @@ struct pci_doe_mb; */ struct pci_doe_task { struct pci_doe_protocol prot; - u32 *request_pl; + __le32 *request_pl; size_t request_pl_sz; - u32 *response_pl; + __le32 *response_pl; size_t response_pl_sz; int rv; void (*complete)(struct pci_doe_task *task); -- cgit v1.2.3 From aa01c67de5926fdb276793180564f172c55fb0d7 Mon Sep 17 00:00:00 2001 From: Caleb Sander Date: Mon, 20 Mar 2023 09:57:36 -0600 Subject: nvme-tcp: fix nvme_tcp_term_pdu to match spec The FEI field of C2HTermReq/H2CTermReq is 4 bytes but not 4-byte-aligned in the NVMe/TCP specification (it is located at offset 10 in the PDU). Split it into two 16-bit integers in struct nvme_tcp_term_pdu so no padding is inserted. There should also be 10 reserved bytes after. There are currently no users of this type. Fixes: fc221d05447aa6db ("nvme-tcp: Add protocol header") Reported-by: Geert Uytterhoeven Signed-off-by: Caleb Sander Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-tcp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h index 75470159a194..57ebe1267f7f 100644 --- a/include/linux/nvme-tcp.h +++ b/include/linux/nvme-tcp.h @@ -115,8 +115,9 @@ struct nvme_tcp_icresp_pdu { struct nvme_tcp_term_pdu { struct nvme_tcp_hdr hdr; __le16 fes; - __le32 fei; - __u8 rsvd[8]; + __le16 feil; + __le16 feiu; + __u8 rsvd[10]; }; /** -- cgit v1.2.3 From 790930f44289c8209c57461b2db499fcc702e0b3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 17 Mar 2023 18:01:26 +0100 Subject: thermal: core: Introduce thermal_cooling_device_update() Introduce a core thermal API function, thermal_cooling_device_update(), for updating the max_state value for a cooling device and rearranging its statistics in sysfs after a possible change of its ->get_max_state() callback return value. That callback is now invoked only once, during cooling device registration, to populate the max_state field in the cooling device object, so if its return value changes, it needs to be invoked again and the new return value needs to be stored as max_state. Moreover, the statistics presented in sysfs need to be rearranged in general, because there may not be enough room in them to store data for all of the possible states (in the case when max_state grows). The new function takes care of that (and some other minor things related to it), but some extra locking and lockdep annotations are added in several places too to protect against crashes in the cases when the statistics are not present or when a stale max_state value might be used by sysfs attributes. Note that the actual user of the new function will be added separately. Link: https://lore.kernel.org/linux-pm/53ec1f06f61c984100868926f282647e57ecfb2d.camel@intel.com/ Signed-off-by: Rafael J. Wysocki Tested-by: Zhang Rui Reviewed-by: Zhang Rui --- drivers/thermal/thermal_core.c | 83 ++++++++++++++++++++++++++++++++++++++++- drivers/thermal/thermal_core.h | 2 + drivers/thermal/thermal_sysfs.c | 74 +++++++++++++++++++++++++++++++----- include/linux/thermal.h | 1 + 4 files changed, 150 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 8894342540f1..cfd4c1afeae7 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -613,6 +613,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, struct thermal_instance *pos; struct thermal_zone_device *pos1; struct thermal_cooling_device *pos2; + bool upper_no_limit; int result; if (trip >= tz->num_trips || trip < 0) @@ -632,7 +633,13 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, /* lower default 0, upper default max_state */ lower = lower == THERMAL_NO_LIMIT ? 0 : lower; - upper = upper == THERMAL_NO_LIMIT ? cdev->max_state : upper; + + if (upper == THERMAL_NO_LIMIT) { + upper = cdev->max_state; + upper_no_limit = true; + } else { + upper_no_limit = false; + } if (lower > upper || upper > cdev->max_state) return -EINVAL; @@ -644,6 +651,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, dev->cdev = cdev; dev->trip = trip; dev->upper = upper; + dev->upper_no_limit = upper_no_limit; dev->lower = lower; dev->target = THERMAL_NO_TARGET; dev->weight = weight; @@ -1057,6 +1065,79 @@ static bool thermal_cooling_device_present(struct thermal_cooling_device *cdev) return false; } +/** + * thermal_cooling_device_update - Update a cooling device object + * @cdev: Target cooling device. + * + * Update @cdev to reflect a change of the underlying hardware or platform. + * + * Must be called when the maximum cooling state of @cdev becomes invalid and so + * its .get_max_state() callback needs to be run to produce the new maximum + * cooling state value. + */ +void thermal_cooling_device_update(struct thermal_cooling_device *cdev) +{ + struct thermal_instance *ti; + unsigned long state; + + if (IS_ERR_OR_NULL(cdev)) + return; + + /* + * Hold thermal_list_lock throughout the update to prevent the device + * from going away while being updated. + */ + mutex_lock(&thermal_list_lock); + + if (!thermal_cooling_device_present(cdev)) + goto unlock_list; + + /* + * Update under the cdev lock to prevent the state from being set beyond + * the new limit concurrently. + */ + mutex_lock(&cdev->lock); + + if (cdev->ops->get_max_state(cdev, &cdev->max_state)) + goto unlock; + + thermal_cooling_device_stats_reinit(cdev); + + list_for_each_entry(ti, &cdev->thermal_instances, cdev_node) { + if (ti->upper == cdev->max_state) + continue; + + if (ti->upper < cdev->max_state) { + if (ti->upper_no_limit) + ti->upper = cdev->max_state; + + continue; + } + + ti->upper = cdev->max_state; + if (ti->lower > ti->upper) + ti->lower = ti->upper; + + if (ti->target == THERMAL_NO_TARGET) + continue; + + if (ti->target > ti->upper) + ti->target = ti->upper; + } + + if (cdev->ops->get_cur_state(cdev, &state) || state > cdev->max_state) + goto unlock; + + thermal_cooling_device_stats_update(cdev, state); + +unlock: + mutex_unlock(&cdev->lock); + +unlock_list: + mutex_unlock(&thermal_list_lock); +} +EXPORT_SYMBOL_GPL(thermal_cooling_device_update); + static void __unbind(struct thermal_zone_device *tz, int mask, struct thermal_cooling_device *cdev) { diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 7af54382e915..3d4a787c6b28 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -101,6 +101,7 @@ struct thermal_instance { struct list_head tz_node; /* node in tz->thermal_instances */ struct list_head cdev_node; /* node in cdev->thermal_instances */ unsigned int weight; /* The weight of the cooling device */ + bool upper_no_limit; }; #define to_thermal_zone(_dev) \ @@ -127,6 +128,7 @@ int thermal_zone_create_device_groups(struct thermal_zone_device *, int); void thermal_zone_destroy_device_groups(struct thermal_zone_device *); void thermal_cooling_device_setup_sysfs(struct thermal_cooling_device *); void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev); +void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev); /* used only at binding time */ ssize_t trip_point_show(struct device *, struct device_attribute *, char *); ssize_t weight_show(struct device *, struct device_attribute *, char *); diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index cef860deaf91..a4aba7b8bb8b 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -685,6 +685,8 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev, { struct cooling_dev_stats *stats = cdev->stats; + lockdep_assert_held(&cdev->lock); + if (!stats) return; @@ -706,13 +708,22 @@ static ssize_t total_trans_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; - int ret; + struct cooling_dev_stats *stats; + int ret = 0; + + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; spin_lock(&stats->lock); ret = sprintf(buf, "%u\n", stats->total_trans); spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return ret; } @@ -721,11 +732,18 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; + struct cooling_dev_stats *stats; ssize_t len = 0; int i; + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; + spin_lock(&stats->lock); + update_time_in_state(stats); for (i = 0; i <= cdev->max_state; i++) { @@ -734,6 +752,9 @@ time_in_state_ms_show(struct device *dev, struct device_attribute *attr, } spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return len; } @@ -742,8 +763,16 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; - int i, states = cdev->max_state + 1; + struct cooling_dev_stats *stats; + int i, states; + + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) + goto unlock; + + states = cdev->max_state + 1; spin_lock(&stats->lock); @@ -757,6 +786,9 @@ reset_store(struct device *dev, struct device_attribute *attr, const char *buf, spin_unlock(&stats->lock); +unlock: + mutex_unlock(&cdev->lock); + return count; } @@ -764,10 +796,18 @@ static ssize_t trans_table_show(struct device *dev, struct device_attribute *attr, char *buf) { struct thermal_cooling_device *cdev = to_cooling_device(dev); - struct cooling_dev_stats *stats = cdev->stats; + struct cooling_dev_stats *stats; ssize_t len = 0; int i, j; + mutex_lock(&cdev->lock); + + stats = cdev->stats; + if (!stats) { + len = -ENODATA; + goto unlock; + } + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); len += snprintf(buf + len, PAGE_SIZE - len, " : "); for (i = 0; i <= cdev->max_state; i++) { @@ -775,8 +815,10 @@ static ssize_t trans_table_show(struct device *dev, break; len += snprintf(buf + len, PAGE_SIZE - len, "state%2u ", i); } - if (len >= PAGE_SIZE) - return PAGE_SIZE; + if (len >= PAGE_SIZE) { + len = PAGE_SIZE; + goto unlock; + } len += snprintf(buf + len, PAGE_SIZE - len, "\n"); @@ -799,8 +841,12 @@ static ssize_t trans_table_show(struct device *dev, if (len >= PAGE_SIZE) { pr_warn_once("Thermal transition table exceeds PAGE_SIZE. Disabling\n"); - return -EFBIG; + len = -EFBIG; } + +unlock: + mutex_unlock(&cdev->lock); + return len; } @@ -830,6 +876,8 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) unsigned long states = cdev->max_state + 1; int var; + lockdep_assert_held(&cdev->lock); + var = sizeof(*stats); var += sizeof(*stats->time_in_state) * states; var += sizeof(*stats->trans_table) * states * states; @@ -855,6 +903,8 @@ out: static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev) { + lockdep_assert_held(&cdev->lock); + kfree(cdev->stats); cdev->stats = NULL; } @@ -879,6 +929,12 @@ void thermal_cooling_device_destroy_sysfs(struct thermal_cooling_device *cdev) cooling_device_stats_destroy(cdev); } +void thermal_cooling_device_stats_reinit(struct thermal_cooling_device *cdev) +{ + cooling_device_stats_destroy(cdev); + cooling_device_stats_setup(cdev); +} + /* these helper will be used only at the time of bindig */ ssize_t trip_point_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 2bb4bf33f4f3..13c6aaed18df 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -384,6 +384,7 @@ devm_thermal_of_cooling_device_register(struct device *dev, struct device_node *np, char *type, void *devdata, const struct thermal_cooling_device_ops *ops); +void thermal_cooling_device_update(struct thermal_cooling_device *); void thermal_cooling_device_unregister(struct thermal_cooling_device *); struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name); int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); -- cgit v1.2.3 From d18a04157fc171fd48075e3dc96471bd3b87f0dd Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Mon, 6 Mar 2023 12:27:43 +0000 Subject: rcu: Fix rcu_torture_read ftrace event Fix the rcutorturename field so that its size is correctly reported in the text format embedded in trace.dat files. As it stands, it is reported as being of size 1: field:char rcutorturename[8]; offset:8; size:1; signed:0; Signed-off-by: Douglas Raillard Reviewed-by: Mukesh Ojha Cc: stable@vger.kernel.org Fixes: 04ae87a52074e ("ftrace: Rework event_create_dir()") Reviewed-by: Steven Rostedt (Google) [ boqun: Add "Cc" and "Fixes" tags per Steven ] Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 90b2fb0292cb..012fa0d171b2 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -768,7 +768,7 @@ TRACE_EVENT_RCU(rcu_torture_read, TP_ARGS(rcutorturename, rhp, secs, c_old, c), TP_STRUCT__entry( - __field(char, rcutorturename[RCUTORTURENAME_LEN]) + __array(char, rcutorturename, RCUTORTURENAME_LEN) __field(struct rcu_head *, rhp) __field(unsigned long, secs) __field(unsigned long, c_old) -- cgit v1.2.3 From 294d749b5df5a22d17989833fb1a0a2cd1dfd243 Mon Sep 17 00:00:00 2001 From: Kiran K Date: Tue, 28 Feb 2023 16:31:54 +0530 Subject: Bluetooth: btintel: Iterate only bluetooth device ACPI entries Current flow interates over entire ACPI table entries looking for Bluetooth Per Platform Antenna Gain(PPAG) entry. This patch iterates over ACPI entries relvant to Bluetooth device only. Fixes: c585a92b2f9c ("Bluetooth: btintel: Set Per Platform Antenna Gain(PPAG)") Signed-off-by: Kiran K Signed-off-by: Luiz Augusto von Dentz --- drivers/bluetooth/btintel.c | 44 ++++++++++++++++++++++++---------------- drivers/bluetooth/btintel.h | 7 ------- include/net/bluetooth/hci_core.h | 1 + 3 files changed, 27 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index bede8b005594..e8d4b59e89c5 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -26,7 +26,14 @@ #define ECDSA_HEADER_LEN 320 #define BTINTEL_PPAG_NAME "PPAG" -#define BTINTEL_PPAG_PREFIX "\\_SB_.PCI0.XHCI.RHUB" + +/* structure to store the PPAG data read from ACPI table */ +struct btintel_ppag { + u32 domain; + u32 mode; + acpi_status status; + struct hci_dev *hdev; +}; #define CMD_WRITE_BOOT_PARAMS 0xfc0e struct cmd_write_boot_params { @@ -1295,17 +1302,16 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data status = acpi_get_name(handle, ACPI_FULL_PATHNAME, &string); if (ACPI_FAILURE(status)) { - bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status)); + bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status)); return status; } - if (strncmp(BTINTEL_PPAG_PREFIX, string.pointer, - strlen(BTINTEL_PPAG_PREFIX))) { + len = strlen(string.pointer); + if (len < strlen(BTINTEL_PPAG_NAME)) { kfree(string.pointer); return AE_OK; } - len = strlen(string.pointer); if (strncmp((char *)string.pointer + len - 4, BTINTEL_PPAG_NAME, 4)) { kfree(string.pointer); return AE_OK; @@ -1314,7 +1320,8 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data status = acpi_evaluate_object(handle, NULL, NULL, &buffer); if (ACPI_FAILURE(status)) { - bt_dev_warn(hdev, "ACPI Failure: %s", acpi_format_exception(status)); + ppag->status = status; + bt_dev_warn(hdev, "PPAG-BT: ACPI Failure: %s", acpi_format_exception(status)); return status; } @@ -1323,8 +1330,9 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data if (p->type != ACPI_TYPE_PACKAGE || p->package.count != 2) { kfree(buffer.pointer); - bt_dev_warn(hdev, "Invalid object type: %d or package count: %d", + bt_dev_warn(hdev, "PPAG-BT: Invalid object type: %d or package count: %d", p->type, p->package.count); + ppag->status = AE_ERROR; return AE_ERROR; } @@ -1335,6 +1343,7 @@ static acpi_status btintel_ppag_callback(acpi_handle handle, u32 lvl, void *data ppag->domain = (u32)p->package.elements[0].integer.value; ppag->mode = (u32)p->package.elements[1].integer.value; + ppag->status = AE_OK; kfree(buffer.pointer); return AE_CTRL_TERMINATE; } @@ -2314,12 +2323,11 @@ error: static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver) { - acpi_status status; struct btintel_ppag ppag; struct sk_buff *skb; struct btintel_loc_aware_reg ppag_cmd; - /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ + /* PPAG is not supported if CRF is HrP2, Jfp2, JfP1 */ switch (ver->cnvr_top & 0xFFF) { case 0x504: /* Hrp2 */ case 0x202: /* Jfp2 */ @@ -2330,26 +2338,26 @@ static void btintel_set_ppag(struct hci_dev *hdev, struct intel_version_tlv *ver memset(&ppag, 0, sizeof(ppag)); ppag.hdev = hdev; - status = acpi_walk_namespace(ACPI_TYPE_ANY, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, NULL, - btintel_ppag_callback, &ppag, NULL); + ppag.status = AE_NOT_FOUND; + acpi_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_HANDLE(GET_HCIDEV_DEV(hdev)), + 1, NULL, btintel_ppag_callback, &ppag, NULL); - if (ACPI_FAILURE(status)) { - /* Do not log warning message if ACPI entry is not found */ - if (status == AE_NOT_FOUND) + if (ACPI_FAILURE(ppag.status)) { + if (ppag.status == AE_NOT_FOUND) { + bt_dev_dbg(hdev, "PPAG-BT: ACPI entry not found"); return; - bt_dev_warn(hdev, "PPAG: ACPI Failure: %s", acpi_format_exception(status)); + } return; } if (ppag.domain != 0x12) { - bt_dev_warn(hdev, "PPAG-BT Domain disabled"); + bt_dev_warn(hdev, "PPAG-BT: domain is not bluetooth"); return; } /* PPAG mode, BIT0 = 0 Disabled, BIT0 = 1 Enabled */ if (!(ppag.mode & BIT(0))) { - bt_dev_dbg(hdev, "PPAG disabled"); + bt_dev_dbg(hdev, "PPAG-BT: disabled"); return; } diff --git a/drivers/bluetooth/btintel.h b/drivers/bluetooth/btintel.h index 8e7da877efae..8fdb65b66315 100644 --- a/drivers/bluetooth/btintel.h +++ b/drivers/bluetooth/btintel.h @@ -137,13 +137,6 @@ struct intel_offload_use_cases { __u8 preset[8]; } __packed; -/* structure to store the PPAG data read from ACPI table */ -struct btintel_ppag { - u32 domain; - u32 mode; - struct hci_dev *hdev; -}; - struct btintel_loc_aware_reg { __le32 mcc; __le32 sel; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7254edfba4c9..6ed9b4d546a7 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1613,6 +1613,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn); void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->dev.parent = (pdev)) +#define GET_HCIDEV_DEV(hdev) ((hdev)->dev.parent) /* ----- LMP capabilities ----- */ #define lmp_encrypt_capable(dev) ((dev)->features[0][0] & LMP_ENCRYPT) -- cgit v1.2.3 From d0072ca529674c36421023ffe90837a7de9387f3 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 11 Mar 2023 08:18:00 +0900 Subject: mm: mmap: remove newline at the end of the trace We already have newline in TP_printk so remove the redundant newline character at the end of the mmap trace. <...>-345 [006] ..... 95.589290: exit_mmap: mt_mod ... <...>-345 [006] ..... 95.589413: vm_unmapped_area: addr=... <...>-345 [006] ..... 95.589571: vm_unmapped_area: addr=... <...>-345 [006] ..... 95.589606: vm_unmapped_area: addr=... to <...>-336 [006] ..... 44.762506: exit_mmap: mt_mod ... <...>-336 [006] ..... 44.762654: vm_unmapped_area: addr=... <...>-336 [006] ..... 44.762794: vm_unmapped_area: addr=... <...>-336 [006] ..... 44.762835: vm_unmapped_area: addr=... Link: https://lkml.kernel.org/r/ZAu6qDsNPmk82UjV@minwoo-desktop FIxes: df529cabb7a25 ("mm: mmap: add trace point of vm_unmapped_area") Signed-off-by: Minwoo Im Acked-by: Steven Rostedt (Google) Reviewed-by: Mukesh Ojha Reviewed-by: David Hildenbrand Signed-off-by: Andrew Morton --- include/trace/events/mmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h index 216de5f03621..f8d61485de16 100644 --- a/include/trace/events/mmap.h +++ b/include/trace/events/mmap.h @@ -35,7 +35,7 @@ TRACE_EVENT(vm_unmapped_area, __entry->align_offset = info->align_offset; ), - TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx\n", + TP_printk("addr=0x%lx err=%ld total_vm=0x%lx flags=0x%lx len=0x%lx lo=0x%lx hi=0x%lx mask=0x%lx ofs=0x%lx", IS_ERR_VALUE(__entry->addr) ? 0 : __entry->addr, IS_ERR_VALUE(__entry->addr) ? __entry->addr : 0, __entry->total_vm, __entry->flags, __entry->length, @@ -110,7 +110,7 @@ TRACE_EVENT(exit_mmap, __entry->mt = &mm->mm_mt; ), - TP_printk("mt_mod %p, DESTROY\n", + TP_printk("mt_mod %p, DESTROY", __entry->mt ) ); -- cgit v1.2.3 From a90ac762d345890b40d88a1385a34a2449c2d75e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:42 +0000 Subject: net: sfp: make sfp_bus_find_fwnode() take a const fwnode sfp_bus_find_fwnode() does not write to the fwnode, so let's make it const. Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/phy/sfp-bus.c | 2 +- include/linux/sfp.h | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index daac293e8ede..8284f29b3644 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -593,7 +593,7 @@ static void sfp_upstream_clear(struct sfp_bus *bus) * - %-ENOMEM if we failed to allocate the bus. * - an error from the upstream's connect_phy() method. */ -struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) +struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode) { struct fwnode_reference_args ref; struct sfp_bus *bus; diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 52b98f9666a2..ef06a195b3c2 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -557,7 +557,7 @@ int sfp_get_module_eeprom_by_page(struct sfp_bus *bus, void sfp_upstream_start(struct sfp_bus *bus); void sfp_upstream_stop(struct sfp_bus *bus); void sfp_bus_put(struct sfp_bus *bus); -struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode); +struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); @@ -619,7 +619,8 @@ static inline void sfp_bus_put(struct sfp_bus *bus) { } -static inline struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) +static inline struct sfp_bus * +sfp_bus_find_fwnode(const struct fwnode_handle *fwnode) { return NULL; } -- cgit v1.2.3 From 4a0faa02d419a6728abef0f1d8a32d8c35ef95e6 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 24 Mar 2023 09:23:53 +0000 Subject: net: phy: constify fwnode_get_phy_node() fwnode argument fwnode_get_phy_node() does not motify the fwnode structure, so make the argument const, Signed-off-by: Russell King (Oracle) Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- include/linux/phy.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1785f1cead97..1de3e339b31a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3057,7 +3057,7 @@ EXPORT_SYMBOL_GPL(device_phy_find_device); * and "phy-device" are not supported in ACPI. DT supports all the three * named references to the phy node. */ -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode) +struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode) { struct fwnode_handle *phy_node; diff --git a/include/linux/phy.h b/include/linux/phy.h index 36bf0bbc8efa..db7c0bd67559 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1547,7 +1547,7 @@ int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id); struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode); struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode); struct phy_device *device_phy_find_device(struct device *dev); -struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode); struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); void phy_device_free(struct phy_device *phydev); -- cgit v1.2.3 From d583fbd7066a2dea43050521a95d9770f7d7593e Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:43 +0100 Subject: KVM: irqfd: Make resampler_list an RCU list It is useful to be able to do read-only traversal of the list of all the registered irqfd resamplers without locking the resampler_lock mutex. In particular, we are going to traverse it to search for a resampler registered for the given irq of an irqchip, and that will be done with an irqchip spinlock (ioapic->lock) held, so it is undesirable to lock a mutex in this context. So turn this list into an RCU list. For protecting the read side, reuse kvm->irq_srcu which is already used for protecting a number of irq related things (kvm->irq_routing, irqfd->resampler->list, kvm->irq_ack_notifier_list, kvm->arch.mask_notifier_list). Signed-off-by: Dmytro Maluka Message-Id: <20230322204344.50138-2-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/kvm_irqfd.h | 2 +- virt/kvm/eventfd.c | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8ada23756b0e..9f508c8e66e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -755,6 +755,7 @@ struct kvm { struct { spinlock_t lock; struct list_head items; + /* resampler_list update side is protected by resampler_lock. */ struct list_head resampler_list; struct mutex resampler_lock; } irqfds; diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index dac047abdba7..8ad43692e3bb 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -31,7 +31,7 @@ struct kvm_kernel_irqfd_resampler { /* * Entry in list of kvm->irqfd.resampler_list. Use for sharing * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock + * RCU list modified under kvm->irqfds.resampler_lock */ struct list_head link; }; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2a3ed401ce46..61aea70dd888 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -96,8 +96,12 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { - list_del(&resampler->link); + list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); + /* + * synchronize_srcu(&kvm->irq_srcu) already called + * in kvm_unregister_irq_ack_notifier(). + */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); @@ -369,7 +373,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) resampler->notifier.irq_acked = irqfd_resampler_ack; INIT_LIST_HEAD(&resampler->link); - list_add(&resampler->link, &kvm->irqfds.resampler_list); + list_add_rcu(&resampler->link, &kvm->irqfds.resampler_list); kvm_register_irq_ack_notifier(kvm, &resampler->notifier); irqfd->resampler = resampler; -- cgit v1.2.3 From fef8f2b90edbd7089a4278021314f11f056b0cbb Mon Sep 17 00:00:00 2001 From: Dmytro Maluka Date: Wed, 22 Mar 2023 21:43:44 +0100 Subject: KVM: x86/ioapic: Resample the pending state of an IRQ when unmasking KVM irqfd based emulation of level-triggered interrupts doesn't work quite correctly in some cases, particularly in the case of interrupts that are handled in a Linux guest as oneshot interrupts (IRQF_ONESHOT). Such an interrupt is acked to the device in its threaded irq handler, i.e. later than it is acked to the interrupt controller (EOI at the end of hardirq), not earlier. Linux keeps such interrupt masked until its threaded handler finishes, to prevent the EOI from re-asserting an unacknowledged interrupt. However, with KVM + vfio (or whatever is listening on the resamplefd) we always notify resamplefd at the EOI, so vfio prematurely unmasks the host physical IRQ, thus a new physical interrupt is fired in the host. This extra interrupt in the host is not a problem per se. The problem is that it is unconditionally queued for injection into the guest, so the guest sees an extra bogus interrupt. [*] There are observed at least 2 user-visible issues caused by those extra erroneous interrupts for a oneshot irq in the guest: 1. System suspend aborted due to a pending wakeup interrupt from ChromeOS EC (drivers/platform/chrome/cros_ec.c). 2. Annoying "invalid report id data" errors from ELAN0000 touchpad (drivers/input/mouse/elan_i2c_core.c), flooding the guest dmesg every time the touchpad is touched. The core issue here is that by the time when the guest unmasks the IRQ, the physical IRQ line is no longer asserted (since the guest has acked the interrupt to the device in the meantime), yet we unconditionally inject the interrupt queued into the guest by the previous resampling. So to fix the issue, we need a way to detect that the IRQ is no longer pending, and cancel the queued interrupt in this case. With IOAPIC we are not able to probe the physical IRQ line state directly (at least not if the underlying physical interrupt controller is an IOAPIC too), so in this patch we use irqfd resampler for that. Namely, instead of injecting the queued interrupt, we just notify the resampler that this interrupt is done. If the IRQ line is actually already deasserted, we are done. If it is still asserted, a new interrupt will be shortly triggered through irqfd and injected into the guest. In the case if there is no irqfd resampler registered for this IRQ, we cannot fix the issue, so we keep the existing behavior: immediately unconditionally inject the queued interrupt. This patch fixes the issue for x86 IOAPIC only. In the long run, we can fix it for other irqchips and other architectures too, possibly taking advantage of reading the physical state of the IRQ line, which is possible with some other irqchips (e.g. with arm64 GIC, maybe even with the legacy x86 PIC). [*] In this description we assume that the interrupt is a physical host interrupt forwarded to the guest e.g. by vfio. Potentially the same issue may occur also with a purely virtual interrupt from an emulated device, e.g. if the guest handles this interrupt, again, as a oneshot interrupt. Signed-off-by: Dmytro Maluka Link: https://lore.kernel.org/kvm/31420943-8c5f-125c-a5ee-d2fde2700083@semihalf.com/ Link: https://lore.kernel.org/lkml/87o7wrug0w.wl-maz@kernel.org/ Message-Id: <20230322204344.50138-3-dmy@semihalf.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/ioapic.c | 36 +++++++++++++++++++++++++++++++++--- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++++++++++++------ 3 files changed, 78 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 042dee556125..995eb5054360 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -368,9 +368,39 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); - if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG - && ioapic->irr & (1 << index)) - ioapic_service(ioapic, index, false); + if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && + ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) { + /* + * Pending status in irr may be outdated: the IRQ line may have + * already been deasserted by a device while the IRQ was masked. + * This occurs, for instance, if the interrupt is handled in a + * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this + * case the guest acknowledges the interrupt to the device in + * its threaded irq handler, i.e. after the EOI but before + * unmasking, so at the time of unmasking the IRQ line is + * already down but our pending irr bit is still set. In such + * cases, injecting this pending interrupt to the guest is + * buggy: the guest will receive an extra unwanted interrupt. + * + * So we need to check here if the IRQ is actually still pending. + * As we are generally not able to probe the IRQ line status + * directly, we do it through irqfd resampler. Namely, we clear + * the pending status and notify the resampler that this interrupt + * is done, without actually injecting it into the guest. If the + * IRQ line is actually already deasserted, we are done. If it is + * still asserted, a new interrupt will be shortly triggered + * through irqfd and injected into the guest. + * + * If, however, it's not possible to resample (no irqfd resampler + * registered for this irq), then unconditionally inject this + * pending interrupt into the guest, so the guest will not miss + * an interrupt, although may get an extra unwanted interrupt. + */ + if (kvm_notify_irqfd_resampler(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index)) + ioapic->irr &= ~(1 << index); + else + ioapic_service(ioapic, index, false); + } if (e->fields.delivery_mode == APIC_DM_FIXED) { struct kvm_lapic_irq irq; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9f508c8e66e1..a9adf75344be 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1987,6 +1987,9 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #ifdef CONFIG_HAVE_KVM_IRQFD int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin); void kvm_irq_routing_update(struct kvm *); #else static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) @@ -1995,6 +1998,13 @@ static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) } static inline void kvm_irqfd_release(struct kvm *kvm) {} + +static inline bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + return false; +} #endif #else diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 61aea70dd888..b0af834ffa95 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -55,6 +55,15 @@ irqfd_inject(struct work_struct *work) irqfd->gsi, 1, false); } +static void irqfd_resampler_notify(struct kvm_kernel_irqfd_resampler *resampler) +{ + struct kvm_kernel_irqfd *irqfd; + + list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, + srcu_read_lock_held(&resampler->kvm->irq_srcu)) + eventfd_signal(irqfd->resamplefd, 1); +} + /* * Since resampler irqfds share an IRQ source ID, we de-assert once * then notify all of the resampler irqfds using this GSI. We can't @@ -65,7 +74,6 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct kvm_kernel_irqfd *irqfd; int idx; resampler = container_of(kian, @@ -76,11 +84,7 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) resampler->notifier.gsi, 0, false); idx = srcu_read_lock(&kvm->irq_srcu); - - list_for_each_entry_srcu(irqfd, &resampler->list, resampler_link, - srcu_read_lock_held(&kvm->irq_srcu)) - eventfd_signal(irqfd->resamplefd, 1); - + irqfd_resampler_notify(resampler); srcu_read_unlock(&kvm->irq_srcu, idx); } @@ -648,6 +652,31 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_unlock_irq(&kvm->irqfds.lock); } +bool kvm_notify_irqfd_resampler(struct kvm *kvm, + unsigned int irqchip, + unsigned int pin) +{ + struct kvm_kernel_irqfd_resampler *resampler; + int gsi, idx; + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); + if (gsi != -1) { + list_for_each_entry_srcu(resampler, + &kvm->irqfds.resampler_list, link, + srcu_read_lock_held(&kvm->irq_srcu)) { + if (resampler->notifier.gsi == gsi) { + irqfd_resampler_notify(resampler); + srcu_read_unlock(&kvm->irq_srcu, idx); + return true; + } + } + } + srcu_read_unlock(&kvm->irq_srcu, idx); + + return false; +} + /* * create a host-wide workqueue for issuing deferred shutdown requests * aggregated from all vm* instances. We need our own isolated -- cgit v1.2.3 From baad10973fdb442912af676de3348e80bd8fe602 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 30 Mar 2023 17:35:13 +0200 Subject: Revert "drm/scheduler: track GPU active time per entity" This reverts commit df622729ddbf as it introduces a use-after-free, which isn't easy to fix without going back to the design drawing board. Reported-by: Danilo Krummrich Signed-off-by: Lucas Stach --- drivers/gpu/drm/scheduler/sched_main.c | 6 ------ include/drm/gpu_scheduler.h | 7 ------- 2 files changed, 13 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 4e6ad6e122bc..0e4378420271 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -906,12 +906,6 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) spin_unlock(&sched->job_list_lock); - if (job) { - job->entity->elapsed_ns += ktime_to_ns( - ktime_sub(job->s_fence->finished.timestamp, - job->s_fence->scheduled.timestamp)); - } - return job; } diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 9db9e5e504ee..9935d1e2ff69 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -228,13 +228,6 @@ struct drm_sched_entity { */ struct rb_node rb_tree_node; - /** - * @elapsed_ns: - * - * Records the amount of time where jobs from this entity were active - * on the GPU. - */ - uint64_t elapsed_ns; }; /** -- cgit v1.2.3 From 16812c96550c30a8d5743167ef4e462d6fbe7472 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 29 Mar 2023 21:47:21 +0800 Subject: iommu/vt-d: Fix an IOMMU perfmon warning when CPU hotplug A warning can be triggered when hotplug CPU 0. $ echo 0 > /sys/devices/system/cpu/cpu0/online ------------[ cut here ]------------ Voluntary context switch within RCU read-side critical section! WARNING: CPU: 0 PID: 19 at kernel/rcu/tree_plugin.h:318 rcu_note_context_switch+0x4f4/0x580 RIP: 0010:rcu_note_context_switch+0x4f4/0x580 Call Trace: ? perf_event_update_userpage+0x104/0x150 __schedule+0x8d/0x960 ? perf_event_set_state.part.82+0x11/0x50 schedule+0x44/0xb0 schedule_timeout+0x226/0x310 ? __perf_event_disable+0x64/0x1a0 ? _raw_spin_unlock+0x14/0x30 wait_for_completion+0x94/0x130 __wait_rcu_gp+0x108/0x130 synchronize_rcu+0x67/0x70 ? invoke_rcu_core+0xb0/0xb0 ? __bpf_trace_rcu_stall_warning+0x10/0x10 perf_pmu_migrate_context+0x121/0x370 iommu_pmu_cpu_offline+0x6a/0xa0 ? iommu_pmu_del+0x1e0/0x1e0 cpuhp_invoke_callback+0x129/0x510 cpuhp_thread_fun+0x94/0x150 smpboot_thread_fn+0x183/0x220 ? sort_range+0x20/0x20 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 ---[ end trace 0000000000000000 ]--- The synchronize_rcu() will be invoked in the perf_pmu_migrate_context(), when migrating a PMU to a new CPU. However, the current for_each_iommu() is within RCU read-side critical section. Two methods were considered to fix the issue. - Use the dmar_global_lock to replace the RCU read lock when going through the drhd list. But it triggers a lockdep warning. - Use the cpuhp_setup_state_multi() to set up a dedicated state for each IOMMU PMU. The lock can be avoided. The latter method is implemented in this patch. Since each IOMMU PMU has a dedicated state, add cpuhp_node and cpu in struct iommu_pmu to track the state. The state can be dynamically allocated now. Remove the CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE. Fixes: 46284c6ceb5e ("iommu/vt-d: Support cpumask for IOMMU perfmon") Reported-by: Ammy Yi Signed-off-by: Kan Liang Link: https://lore.kernel.org/r/20230328182028.1366416-1-kan.liang@linux.intel.com Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20230329134721.469447-4-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.h | 2 ++ drivers/iommu/intel/perfmon.c | 68 ++++++++++++++++++++++++++++--------------- include/linux/cpuhotplug.h | 1 - 3 files changed, 46 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d6df3b865812..694ab9b7d3e9 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -641,6 +641,8 @@ struct iommu_pmu { DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; unsigned char irq_name[16]; + struct hlist_node cpuhp_node; + int cpu; }; #define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index e17d9743a0d8..cf43e798eca4 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -773,19 +773,34 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) iommu->perf_irq = 0; } -static int iommu_pmu_cpu_online(unsigned int cpu) +static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { + struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); + if (cpumask_empty(&iommu_pmu_cpu_mask)) cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); + if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask)) + iommu_pmu->cpu = cpu; + return 0; } -static int iommu_pmu_cpu_offline(unsigned int cpu) +static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) { - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - int target; + struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); + int target = cpumask_first(&iommu_pmu_cpu_mask); + + /* + * The iommu_pmu_cpu_mask has been updated when offline the CPU + * for the first iommu_pmu. Migrate the other iommu_pmu to the + * new target. + */ + if (target < nr_cpu_ids && target != iommu_pmu->cpu) { + perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); + iommu_pmu->cpu = target; + return 0; + } if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) return 0; @@ -795,45 +810,50 @@ static int iommu_pmu_cpu_offline(unsigned int cpu) if (target < nr_cpu_ids) cpumask_set_cpu(target, &iommu_pmu_cpu_mask); else - target = -1; + return 0; - rcu_read_lock(); - - for_each_iommu(iommu, drhd) { - if (!iommu->pmu) - continue; - perf_pmu_migrate_context(&iommu->pmu->pmu, cpu, target); - } - rcu_read_unlock(); + perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); + iommu_pmu->cpu = target; return 0; } static int nr_iommu_pmu; +static enum cpuhp_state iommu_cpuhp_slot; static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) { int ret; - if (nr_iommu_pmu++) - return 0; + if (!nr_iommu_pmu) { + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, + "driver/iommu/intel/perfmon:online", + iommu_pmu_cpu_online, + iommu_pmu_cpu_offline); + if (ret < 0) + return ret; + iommu_cpuhp_slot = ret; + } - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, - "driver/iommu/intel/perfmon:online", - iommu_pmu_cpu_online, - iommu_pmu_cpu_offline); - if (ret) - nr_iommu_pmu = 0; + ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); + if (ret) { + if (!nr_iommu_pmu) + cpuhp_remove_multi_state(iommu_cpuhp_slot); + return ret; + } + nr_iommu_pmu++; - return ret; + return 0; } static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) { + cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); + if (--nr_iommu_pmu) return; - cpuhp_remove_state(CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE); + cpuhp_remove_multi_state(iommu_cpuhp_slot); } void iommu_pmu_register(struct intel_iommu *iommu) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index c6fab004104a..5b2f8147d1ae 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -218,7 +218,6 @@ enum cpuhp_state { CPUHP_AP_PERF_X86_CQM_ONLINE, CPUHP_AP_PERF_X86_CSTATE_ONLINE, CPUHP_AP_PERF_X86_IDXD_ONLINE, - CPUHP_AP_PERF_X86_IOMMU_PERF_ONLINE, CPUHP_AP_PERF_S390_CF_ONLINE, CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, -- cgit v1.2.3 From 653a180957a85c3fc30320cc7e84f5dc913a64f8 Mon Sep 17 00:00:00 2001 From: Michael Sit Wei Hong Date: Thu, 30 Mar 2023 17:14:02 +0800 Subject: net: phylink: add phylink_expects_phy() method Provide phylink_expects_phy() to allow MAC drivers to check if it is expecting a PHY to attach to. Since fixed-linked setups do not need to attach to a PHY. Provides a boolean value as to if the MAC should expect a PHY. Returns true if a PHY is expected. Reviewed-by: Russell King (Oracle) Signed-off-by: Michael Sit Wei Hong Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 19 +++++++++++++++++++ include/linux/phylink.h | 1 + 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 1a2f074685fa..30c166b33468 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1586,6 +1586,25 @@ void phylink_destroy(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_destroy); +/** + * phylink_expects_phy() - Determine if phylink expects a phy to be attached + * @pl: a pointer to a &struct phylink returned from phylink_create() + * + * When using fixed-link mode, or in-band mode with 1000base-X or 2500base-X, + * no PHY is needed. + * + * Returns true if phylink will be expecting a PHY. + */ +bool phylink_expects_phy(struct phylink *pl) +{ + if (pl->cfg_link_an_mode == MLO_AN_FIXED || + (pl->cfg_link_an_mode == MLO_AN_INBAND && + phy_interface_mode_is_8023z(pl->link_config.interface))) + return false; + return true; +} +EXPORT_SYMBOL_GPL(phylink_expects_phy); + static void phylink_phy_change(struct phy_device *phydev, bool up) { struct phylink *pl = phydev->phylink; diff --git a/include/linux/phylink.h b/include/linux/phylink.h index c492c26202b5..637698ed5cb6 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -574,6 +574,7 @@ struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *, phy_interface_t iface, const struct phylink_mac_ops *mac_ops); void phylink_destroy(struct phylink *); +bool phylink_expects_phy(struct phylink *pl); int phylink_connect_phy(struct phylink *, struct phy_device *); int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags); -- cgit v1.2.3 From 86eb94bf8006a85738f0ccf49e3ce894e03922a6 Mon Sep 17 00:00:00 2001 From: Adrien Thierry Date: Wed, 29 Mar 2023 16:54:25 -0400 Subject: scsi: Revert "scsi: ufs: core: Initialize devfreq synchronously" This reverts commit 7dafc3e007918384c8693ff8d70381b5c1e9c247. This patch introduced a regression [1] where hba->pwr_info is used before being initialized, which could create issues in ufshcd_scale_gear(). Revert it until a better solution is found. [1] https://lore.kernel.org/all/CAGaU9a_PMZhqv+YJ0r3w-hJMsR922oxW6Kg59vw+oen-NZ6Otw@mail.gmail.com Signed-off-by: Adrien Thierry Link: https://lore.kernel.org/r/20230329205426.46393-1-athierry@redhat.com Reviewed-by: Stanley Chu Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 47 ++++++++++++++++------------------------------- include/ufs/ufshcd.h | 1 - 2 files changed, 16 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 37e178a9ac47..70b112038792 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1409,13 +1409,6 @@ static int ufshcd_devfreq_target(struct device *dev, struct ufs_clk_info *clki; unsigned long irq_flags; - /* - * Skip devfreq if UFS initialization is not finished. - * Otherwise ufs could be in a inconsistent state. - */ - if (!smp_load_acquire(&hba->logical_unit_scan_finished)) - return 0; - if (!ufshcd_is_clkscaling_supported(hba)) return -EINVAL; @@ -8399,6 +8392,22 @@ static int ufshcd_add_lus(struct ufs_hba *hba) if (ret) goto out; + /* Initialize devfreq after UFS device is detected */ + if (ufshcd_is_clkscaling_supported(hba)) { + memcpy(&hba->clk_scaling.saved_pwr_info.info, + &hba->pwr_info, + sizeof(struct ufs_pa_layer_attr)); + hba->clk_scaling.saved_pwr_info.is_valid = true; + hba->clk_scaling.is_allowed = true; + + ret = ufshcd_devfreq_init(hba); + if (ret) + goto out; + + hba->clk_scaling.is_enabled = true; + ufshcd_init_clk_scaling_sysfs(hba); + } + ufs_bsg_probe(hba); ufshpb_init(hba); scsi_scan_host(hba->host); @@ -8670,12 +8679,6 @@ out: if (ret) { pm_runtime_put_sync(hba->dev); ufshcd_hba_exit(hba); - } else { - /* - * Make sure that when reader code sees UFS initialization has finished, - * all initialization steps have really been executed. - */ - smp_store_release(&hba->logical_unit_scan_finished, true); } } @@ -10316,30 +10319,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ ufshcd_set_ufs_dev_active(hba); - /* Initialize devfreq */ - if (ufshcd_is_clkscaling_supported(hba)) { - memcpy(&hba->clk_scaling.saved_pwr_info.info, - &hba->pwr_info, - sizeof(struct ufs_pa_layer_attr)); - hba->clk_scaling.saved_pwr_info.is_valid = true; - hba->clk_scaling.is_allowed = true; - - err = ufshcd_devfreq_init(hba); - if (err) - goto rpm_put_sync; - - hba->clk_scaling.is_enabled = true; - ufshcd_init_clk_scaling_sysfs(hba); - } - async_schedule(ufshcd_async_scan, hba); ufs_sysfs_add_nodes(hba->dev); device_enable_async_suspend(dev); return 0; -rpm_put_sync: - pm_runtime_put_sync(dev); free_tmf_queue: blk_mq_destroy_queue(hba->tmf_queue); blk_put_queue(hba->tmf_queue); diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 25aab8ec4f86..431c3afb2ce0 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -979,7 +979,6 @@ struct ufs_hba { struct completion *uic_async_done; enum ufshcd_state ufshcd_state; - bool logical_unit_scan_finished; u32 eh_flags; u32 intr_mask; u16 ee_ctrl_mask; -- cgit v1.2.3 From ea65b41807a26495ff2a73dd8b1bab2751940887 Mon Sep 17 00:00:00 2001 From: John Keeping Date: Mon, 27 Mar 2023 18:36:46 +0100 Subject: ftrace: Mark get_lock_parent_ip() __always_inline If the compiler decides not to inline this function then preemption tracing will always show an IP inside the preemption disabling path and never the function actually calling preempt_{enable,disable}. Link: https://lore.kernel.org/linux-trace-kernel/20230327173647.1690849-1-john@metanate.com Cc: Masami Hiramatsu Cc: Mark Rutland Cc: stable@vger.kernel.org Fixes: f904f58263e1d ("sched/debug: Fix preempt_disable_ip recording for preempt_disable()") Signed-off-by: John Keeping Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 366c730beaa3..402fc061de75 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -980,7 +980,7 @@ static inline void __ftrace_enabled_restore(int enabled) #define CALLER_ADDR5 ((unsigned long)ftrace_return_address(5)) #define CALLER_ADDR6 ((unsigned long)ftrace_return_address(6)) -static inline unsigned long get_lock_parent_ip(void) +static __always_inline unsigned long get_lock_parent_ip(void) { unsigned long addr = CALLER_ADDR0; -- cgit v1.2.3 From f82e7ca019dfad3b006fd3b772f7ac569672db55 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 9 Mar 2023 22:13:02 -0500 Subject: tracing: Error if a trace event has an array for a __field() A __field() in the TRACE_EVENT() macro is used to set up the fields of the trace event data. It is for single storage units (word, char, int, pointer, etc) and not for complex structures or arrays. Unfortunately, there's nothing preventing the build from accepting: __field(int, arr[5]); from building. It will turn into a array value. This use to work fine, as the offset and size use to be determined by the macro using the field name, but things have changed and the offset and size are now determined by the type. So the above would only be size 4, and the next field will be located 4 bytes from it (instead of 20). The proper way to declare static arrays is to use the __array() macro. Instead of __field(int, arr[5]) it should be __array(int, arr, 5). Add some macro tricks to the building of a trace event from the TRACE_EVENT() macro such that __field(int, arr[5]) will fail to build. A comment by the failure will explain why the build failed. Link: https://lore.kernel.org/lkml/20230306122549.236561-1-douglas.raillard@arm.com/ Link: https://lore.kernel.org/linux-trace-kernel/20230309221302.642e82d9@gandalf.local.home Reported-by: Douglas RAILLARD Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) --- include/trace/stages/stage5_get_offsets.h | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index ac5c24d3beeb..e30a13be46ba 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -9,17 +9,30 @@ #undef __entry #define __entry entry +/* + * Fields should never declare an array: i.e. __field(int, arr[5]) + * If they do, it will cause issues in parsing and possibly corrupt the + * events. To prevent that from happening, test the sizeof() a fictitious + * type called "struct _test_no_array_##item" which will fail if "item" + * contains array elements (like "arr[5]"). + * + * If you hit this, use __array(int, arr, 5) instead. + */ #undef __field -#define __field(type, item) +#define __field(type, item) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_ext -#define __field_ext(type, item, filter_type) +#define __field_ext(type, item, filter_type) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_struct -#define __field_struct(type, item) +#define __field_struct(type, item) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __field_struct_ext -#define __field_struct_ext(type, item, filter_type) +#define __field_struct_ext(type, item, filter_type) \ + { (void)sizeof(struct _test_no_array_##item *); } #undef __array #define __array(type, item, len) -- cgit v1.2.3 From d564fa1ff19e893e2971d66e5c8f49dc1cdc8ffc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:52 +0200 Subject: asm-generic/io.h: suppress endianness warnings for readq() and writeq() Commit c1d55d50139b ("asm-generic/io.h: Fix sparse warnings on big-endian architectures") missed fixing the 64-bit accessors. Arnd explains in the attached link why the casts are necessary, even if __raw_readq() and __raw_writeq() do not take endian-specific types. Link: https://lore.kernel.org/lkml/9105d6fc-880b-4734-857d-e3d30b87ccf6@app.fastmail.com/ Suggested-by: Arnd Bergmann Signed-off-by: Vladimir Oltean Reviewed-by: Jonathan Cameron Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index 4c44a29b5e8e..d78c3056c98f 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -236,7 +236,7 @@ static inline u64 readq(const volatile void __iomem *addr) log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); __io_br(); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); __io_ar(val); log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; @@ -287,7 +287,7 @@ static inline void writeq(u64 value, volatile void __iomem *addr) { log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); __io_bw(); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); __io_aw(); log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } -- cgit v1.2.3 From 05d3855b4d21ef3c2df26be1cbba9d2c68915fcb Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 9 Jan 2023 15:11:53 +0200 Subject: asm-generic/io.h: suppress endianness warnings for relaxed accessors Copy the forced type casts from the normal MMIO accessors to suppress the sparse warnings that point out __raw_readl() returns a native endian word (just like readl()). Signed-off-by: Vladimir Oltean Signed-off-by: Arnd Bergmann --- include/asm-generic/io.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h index d78c3056c98f..587e7e9b9a37 100644 --- a/include/asm-generic/io.h +++ b/include/asm-generic/io.h @@ -319,7 +319,7 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr) u16 val; log_read_mmio(16, addr, _THIS_IP_, _RET_IP_); - val = __le16_to_cpu(__raw_readw(addr)); + val = __le16_to_cpu((__le16 __force)__raw_readw(addr)); log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_); return val; } @@ -332,7 +332,7 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr) u32 val; log_read_mmio(32, addr, _THIS_IP_, _RET_IP_); - val = __le32_to_cpu(__raw_readl(addr)); + val = __le32_to_cpu((__le32 __force)__raw_readl(addr)); log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_); return val; } @@ -345,7 +345,7 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr) u64 val; log_read_mmio(64, addr, _THIS_IP_, _RET_IP_); - val = __le64_to_cpu(__raw_readq(addr)); + val = __le64_to_cpu((__le64 __force)__raw_readq(addr)); log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_); return val; } @@ -366,7 +366,7 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr) static inline void writew_relaxed(u16 value, volatile void __iomem *addr) { log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); - __raw_writew(cpu_to_le16(value), addr); + __raw_writew((u16 __force)cpu_to_le16(value), addr); log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_); } #endif @@ -376,7 +376,7 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr) static inline void writel_relaxed(u32 value, volatile void __iomem *addr) { log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); - __raw_writel(__cpu_to_le32(value), addr); + __raw_writel((u32 __force)__cpu_to_le32(value), addr); log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_); } #endif @@ -386,7 +386,7 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr) static inline void writeq_relaxed(u64 value, volatile void __iomem *addr) { log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); - __raw_writeq(__cpu_to_le64(value), addr); + __raw_writeq((u64 __force)__cpu_to_le64(value), addr); log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_); } #endif -- cgit v1.2.3 From 656e9007ef5862746cdf7ac16267c8e06e7b0989 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Mar 2023 09:53:31 +0100 Subject: asm-generic: avoid __generic_cmpxchg_local warnings Code that passes a 32-bit constant into cmpxchg() produces a harmless sparse warning because of the truncation in the branch that is not taken: fs/erofs/zdata.c: note: in included file (through /home/arnd/arm-soc/arch/arm/include/asm/cmpxchg.h, /home/arnd/arm-soc/arch/arm/include/asm/atomic.h, /home/arnd/arm-soc/include/linux/atomic.h, ...): include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe) include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe) include/asm-generic/cmpxchg-local.h:29:33: warning: cast truncates bits from constant value (5f0ecafe becomes fe) include/asm-generic/cmpxchg-local.h:30:42: warning: cast truncates bits from constant value (5f0edead becomes ad) include/asm-generic/cmpxchg-local.h:33:34: warning: cast truncates bits from constant value (5f0ecafe becomes cafe) include/asm-generic/cmpxchg-local.h:34:44: warning: cast truncates bits from constant value (5f0edead becomes dead) This was reported as a regression to Matt's recent __generic_cmpxchg_local patch, though this patch only added more warnings on top of the ones that were already there. Rewording the truncation to use an explicit bitmask instead of a cast to a smaller type avoids the warning but otherwise leaves the code unchanged. I had another look at why the cast is even needed for atomic_cmpxchg(), and as Matt describes the problem here is that atomic_t contains a signed 'int', but cmpxchg() takes an 'unsigned long' argument, and converting between the two leads to a 64-bit sign-extension of negative 32-bit atomics. I checked the other implementations of arch_cmpxchg() and did not find any others that run into the same problem as __generic_cmpxchg_local(), but it's easy to be on the safe side here and always convert the signed int into an unsigned int when calling arch_cmpxchg(), as this will work even when any of the arch_cmpxchg() implementations run into the same problem. Fixes: 624654152284 ("locking/atomic: cmpxchg: Make __generic_cmpxchg_local compare against zero-extended 'old' value") Reviewed-by: Matt Evans Signed-off-by: Arnd Bergmann --- include/asm-generic/atomic.h | 4 ++-- include/asm-generic/cmpxchg-local.h | 12 ++++++------ include/asm-generic/cmpxchg.h | 6 +++--- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 04b8be9f1a77..e271d6708c87 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -130,7 +130,7 @@ ATOMIC_OP(xor, ^) #define arch_atomic_read(v) READ_ONCE((v)->counter) #define arch_atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) -#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (v))) -#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (old), (new))) +#define arch_atomic_xchg(ptr, v) (arch_xchg(&(ptr)->counter, (u32)(v))) +#define arch_atomic_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), (u32)(old), (u32)(new))) #endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/include/asm-generic/cmpxchg-local.h b/include/asm-generic/cmpxchg-local.h index c3e7315b7c1d..3df9f59a544e 100644 --- a/include/asm-generic/cmpxchg-local.h +++ b/include/asm-generic/cmpxchg-local.h @@ -26,16 +26,16 @@ static inline unsigned long __generic_cmpxchg_local(volatile void *ptr, raw_local_irq_save(flags); switch (size) { case 1: prev = *(u8 *)ptr; - if (prev == (u8)old) - *(u8 *)ptr = (u8)new; + if (prev == (old & 0xffu)) + *(u8 *)ptr = (new & 0xffu); break; case 2: prev = *(u16 *)ptr; - if (prev == (u16)old) - *(u16 *)ptr = (u16)new; + if (prev == (old & 0xffffu)) + *(u16 *)ptr = (new & 0xffffu); break; case 4: prev = *(u32 *)ptr; - if (prev == (u32)old) - *(u32 *)ptr = (u32)new; + if (prev == (old & 0xffffffffffu)) + *(u32 *)ptr = (new & 0xffffffffu); break; case 8: prev = *(u64 *)ptr; if (prev == old) diff --git a/include/asm-generic/cmpxchg.h b/include/asm-generic/cmpxchg.h index dca4419922a9..848de25fc4bf 100644 --- a/include/asm-generic/cmpxchg.h +++ b/include/asm-generic/cmpxchg.h @@ -32,7 +32,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u8 *)ptr; - *(volatile u8 *)ptr = x; + *(volatile u8 *)ptr = (x & 0xffu); local_irq_restore(flags); return ret; #endif /* __xchg_u8 */ @@ -43,7 +43,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u16 *)ptr; - *(volatile u16 *)ptr = x; + *(volatile u16 *)ptr = (x & 0xffffu); local_irq_restore(flags); return ret; #endif /* __xchg_u16 */ @@ -54,7 +54,7 @@ unsigned long __generic_xchg(unsigned long x, volatile void *ptr, int size) #else local_irq_save(flags); ret = *(volatile u32 *)ptr; - *(volatile u32 *)ptr = x; + *(volatile u32 *)ptr = (x & 0xffffffffu); local_irq_restore(flags); return ret; #endif /* __xchg_u32 */ -- cgit v1.2.3 From 0a78cf7264d29abeca098eae0b188a10aabc8a32 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 3 Apr 2023 12:49:58 -0700 Subject: raw: Fix NULL deref in raw_get_next(). Dae R. Jeong reported a NULL deref in raw_get_next() [0]. It seems that the repro was running these sequences in parallel so that one thread was iterating on a socket that was being freed in another netns. unshare(0x40060200) r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00') socket$inet_icmp_raw(0x2, 0x3, 0x1) pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f) After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW sockets. However, we should use spinlock for slow paths to avoid the NULL deref. Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object is not reused during iteration in the grace period. In fact, the lockless readers do not check the nulls marker with get_nulls_value(). So, SOCK_RAW should use hlist instead of hlist_nulls. Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(), let's convert hlist_nulls to hlist and use sk_for_each_rcu() for fast paths and sk_for_each() and spinlock for /proc/net/raw. [0]: general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225 seq_read+0x224/0x320 fs/seq_file.c:162 pde_read fs/proc/inode.c:316 [inline] proc_reg_read+0x23f/0x330 fs/proc/inode.c:328 vfs_read+0x31e/0xd30 fs/read_write.c:468 ksys_pread64 fs/read_write.c:665 [inline] __do_sys_pread64 fs/read_write.c:675 [inline] __se_sys_pread64 fs/read_write.c:672 [inline] __x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672 do_syscall_x64 arch/x86/entry/common.c:51 [inline] do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x478d29 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011 RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29 RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003 RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000 R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740 R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50 Modules linked in: ---[ end trace 0000000000000000 ]--- RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline] RIP: 0010:sock_net include/net/sock.h:649 [inline] RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline] RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline] RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995 Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206 RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000 RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338 RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9 R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78 R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030 FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU") Reported-by: syzbot Reported-by: Dae R. Jeong Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/ Signed-off-by: Kuniyuki Iwashima Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- include/net/raw.h | 4 ++-- net/ipv4/raw.c | 36 +++++++++++++++++++----------------- net/ipv4/raw_diag.c | 10 ++++------ net/ipv6/raw.c | 10 ++++------ 4 files changed, 29 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 2c004c20ed99..3af5289fdead 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -37,7 +37,7 @@ int raw_rcv(struct sock *, struct sk_buff *); struct raw_hashinfo { spinlock_t lock; - struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; + struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; }; static inline u32 raw_hashfunc(const struct net *net, u32 proto) @@ -51,7 +51,7 @@ static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) spin_lock_init(&hashinfo->lock); for (i = 0; i < RAW_HTABLE_SIZE; i++) - INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i); + INIT_HLIST_HEAD(&hashinfo->ht[i]); } #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 94df935ee0c5..8088a5011e7d 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -91,12 +91,12 @@ EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - struct hlist_nulls_head *hlist; + struct hlist_head *hlist; hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)]; spin_lock(&h->lock); - __sk_nulls_add_node_rcu(sk, hlist); + sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); spin_unlock(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -110,7 +110,7 @@ void raw_unhash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; spin_lock(&h->lock); - if (__sk_nulls_del_node_init_rcu(sk)) + if (sk_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&h->lock); } @@ -163,16 +163,15 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) static int raw_v4_input(struct net *net, struct sk_buff *skb, const struct iphdr *iph, int hash) { - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int sdif = inet_sdif(skb); + struct hlist_head *hlist; int dif = inet_iif(skb); int delivered = 0; struct sock *sk; hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (!raw_v4_match(net, sk, iph->protocol, iph->saddr, iph->daddr, dif, sdif)) continue; @@ -264,10 +263,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); + struct hlist_head *hlist; const struct iphdr *iph; struct sock *sk; int hash; @@ -276,7 +274,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) hlist = &raw_v4_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { iph = (const struct iphdr *)skb->data; if (!raw_v4_match(net, sk, iph->protocol, iph->daddr, iph->saddr, dif, sdif)) @@ -950,14 +948,13 @@ static struct sock *raw_get_first(struct seq_file *seq, int bucket) { struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { hlist = &h->ht[state->bucket]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each(sk, hlist) { if (sock_net(sk) == seq_file_net(seq)) return sk; } @@ -970,7 +967,7 @@ static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) struct raw_iter_state *state = raw_seq_private(seq); do { - sk = sk_nulls_next(sk); + sk = sk_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); if (!sk) @@ -989,9 +986,12 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) + __acquires(&h->lock) { - rcu_read_lock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_lock(&h->lock); + return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1010,9 +1010,11 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) + __releases(&h->lock) { - rcu_read_unlock(); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); + + spin_unlock(&h->lock); } EXPORT_SYMBOL_GPL(raw_seq_stop); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 999321834b94..da3591a66a16 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -57,8 +57,7 @@ static bool raw_lookup(struct net *net, struct sock *sk, static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int slot; @@ -68,7 +67,7 @@ static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill @@ -142,9 +141,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; int num, s_num, slot, s_slot; + struct hlist_head *hlist; struct sock *sk = NULL; struct nlattr *bc; @@ -161,7 +159,7 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, num = 0; hlist = &hashinfo->ht[slot]; - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index bac9ba747bde..a327aa481df4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -141,10 +141,9 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; const struct in6_addr *saddr; const struct in6_addr *daddr; + struct hlist_head *hlist; struct sock *sk; bool delivered = false; __u8 hash; @@ -155,7 +154,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { int filtered; if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, @@ -333,15 +332,14 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { struct net *net = dev_net(skb->dev); - struct hlist_nulls_head *hlist; - struct hlist_nulls_node *hnode; + struct hlist_head *hlist; struct sock *sk; int hash; hash = raw_hashfunc(net, nexthdr); hlist = &raw_v6_hashinfo.ht[hash]; rcu_read_lock(); - sk_nulls_for_each(sk, hnode, hlist) { + sk_for_each_rcu(sk, hlist) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; -- cgit v1.2.3 From 78dfc9d1d1abb9e400386fa9c5724a8f7d75e3b9 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 4 Apr 2023 13:02:46 +0200 Subject: ACPI: video: Add auto_detect arg to __acpi_video_get_backlight_type() Allow callers of __acpi_video_get_backlight_type() to pass a pointer to a bool which will get set to false if the backlight-type comes from the cmdline or a DMI quirk and set to true if auto-detection was used. And make __acpi_video_get_backlight_type() non static so that it can be called directly outside of video_detect.c . While at it turn the acpi_video_get_backlight_type() and acpi_video_backlight_use_native() wrappers into static inline functions in include/acpi/video.h, so that we need to export one less symbol. Fixes: 5aa9d943e9b6 ("ACPI: video: Don't enable fallback path for creating ACPI backlight by default") Cc: All applicable Reviewed-by: Mario Limonciello Signed-off-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/video_detect.c | 21 ++++++++------------- include/acpi/video.h | 15 +++++++++++++-- 2 files changed, 21 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index fd7cbce8076e..f7c218dd8742 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -782,7 +782,7 @@ static bool prefer_native_over_acpi_video(void) * Determine which type of backlight interface to use on this system, * First check cmdline, then dmi quirks, then do autodetect. */ -static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) +enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto_detect) { static DEFINE_MUTEX(init_mutex); static bool nvidia_wmi_ec_present; @@ -807,6 +807,9 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) native_available = true; mutex_unlock(&init_mutex); + if (auto_detect) + *auto_detect = false; + /* * The below heuristics / detection steps are in order of descending * presedence. The commandline takes presedence over anything else. @@ -818,6 +821,9 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) if (acpi_backlight_dmi != acpi_backlight_undef) return acpi_backlight_dmi; + if (auto_detect) + *auto_detect = true; + /* Special cases such as nvidia_wmi_ec and apple gmux. */ if (nvidia_wmi_ec_present) return acpi_backlight_nvidia_wmi_ec; @@ -837,15 +843,4 @@ static enum acpi_backlight_type __acpi_video_get_backlight_type(bool native) /* No ACPI video/native (old hw), use vendor specific fw methods. */ return acpi_backlight_vendor; } - -enum acpi_backlight_type acpi_video_get_backlight_type(void) -{ - return __acpi_video_get_backlight_type(false); -} -EXPORT_SYMBOL(acpi_video_get_backlight_type); - -bool acpi_video_backlight_use_native(void) -{ - return __acpi_video_get_backlight_type(true) == acpi_backlight_native; -} -EXPORT_SYMBOL(acpi_video_backlight_use_native); +EXPORT_SYMBOL(__acpi_video_get_backlight_type); diff --git a/include/acpi/video.h b/include/acpi/video.h index 8ed9bec03e53..ff5a8da5d883 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -59,8 +59,6 @@ extern void acpi_video_unregister(void); extern void acpi_video_register_backlight(void); extern int acpi_video_get_edid(struct acpi_device *device, int type, int device_id, void **edid); -extern enum acpi_backlight_type acpi_video_get_backlight_type(void); -extern bool acpi_video_backlight_use_native(void); /* * Note: The value returned by acpi_video_handles_brightness_key_presses() * may change over time and should not be cached. @@ -69,6 +67,19 @@ extern bool acpi_video_handles_brightness_key_presses(void); extern int acpi_video_get_levels(struct acpi_device *device, struct acpi_video_device_brightness **dev_br, int *pmax_level); + +extern enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, + bool *auto_detect); + +static inline enum acpi_backlight_type acpi_video_get_backlight_type(void) +{ + return __acpi_video_get_backlight_type(false, NULL); +} + +static inline bool acpi_video_backlight_use_native(void) +{ + return __acpi_video_get_backlight_type(true, NULL) == acpi_backlight_native; +} #else static inline void acpi_video_report_nolcd(void) { return; }; static inline int acpi_video_register(void) { return -ENODEV; } -- cgit v1.2.3 From 3dd4432549415f3c65dd52d5c687629efbf4ece1 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Mon, 27 Feb 2023 09:36:07 -0800 Subject: mm: enable maple tree RCU mode by default Use the maple tree in RCU mode for VMA tracking. The maple tree tracks the stack and is able to update the pivot (lower/upper boundary) in-place to allow the page fault handler to write to the tree while holding just the mmap read lock. This is safe as the writes to the stack have a guard VMA which ensures there will always be a NULL in the direction of the growth and thus will only update a pivot. It is possible, but not recommended, to have VMAs that grow up/down without guard VMAs. syzbot has constructed a testcase which sets up a VMA to grow and consume the empty space. Overwriting the entire NULL entry causes the tree to be altered in a way that is not safe for concurrent readers; the readers may see a node being rewritten or one that does not match the maple state they are using. Enabling RCU mode allows the concurrent readers to see a stable node and will return the expected result. [Liam.Howlett@Oracle.com: we don't need to free the nodes with RCU[ Link: https://lore.kernel.org/linux-mm/000000000000b0a65805f663ace6@google.com/ Link: https://lkml.kernel.org/r/20230227173632.3292573-9-surenb@google.com Fixes: d4af56c5c7c6 ("mm: start tracking VMAs with maple tree") Signed-off-by: Liam R. Howlett Signed-off-by: Suren Baghdasaryan Reported-by: syzbot+8d95422d3537159ca390@syzkaller.appspotmail.com Cc: Signed-off-by: Andrew Morton --- include/linux/mm_types.h | 3 ++- kernel/fork.c | 3 +++ mm/mmap.c | 3 ++- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0722859c3647..a57e6ae78e65 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -774,7 +774,8 @@ struct mm_struct { unsigned long cpu_bitmap[]; }; -#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN) +#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \ + MT_FLAGS_USE_RCU) extern struct mm_struct init_mm; /* Pointer magic because the dynamic array size confuses some compilers. */ diff --git a/kernel/fork.c b/kernel/fork.c index c0257cbee093..0c92f224c68c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -617,6 +617,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (retval) goto out; + mt_clear_in_rcu(vmi.mas.tree); for_each_vma(old_vmi, mpnt) { struct file *file; @@ -700,6 +701,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, retval = arch_dup_mmap(oldmm, mm); loop_out: vma_iter_free(&vmi); + if (!retval) + mt_set_in_rcu(vmi.mas.tree); out: mmap_write_unlock(mm); flush_tlb_mm(oldmm); diff --git a/mm/mmap.c b/mm/mmap.c index ad499f7b767f..ff68a67a2a7c 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2277,7 +2277,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, int count = 0; int error = -ENOMEM; MA_STATE(mas_detach, &mt_detach, 0, 0); - mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN); + mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_set_external_lock(&mt_detach, &mm->mmap_lock); /* @@ -3037,6 +3037,7 @@ void exit_mmap(struct mm_struct *mm) */ set_bit(MMF_OOM_SKIP, &mm->flags); mmap_write_lock(mm); + mt_clear_in_rcu(&mm->mm_mt); free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb); -- cgit v1.2.3