From 2c223f7239f376a90d71903ec474ba887cf21d94 Mon Sep 17 00:00:00 2001 From: Oreoluwa Babatunde Date: Wed, 6 Aug 2025 10:24:21 -0700 Subject: of: reserved_mem: Restructure call site for dma_contiguous_early_fixup() Restructure the call site for dma_contiguous_early_fixup() to where the reserved_mem nodes are being parsed from the DT so that dma_mmu_remap[] is populated before dma_contiguous_remap() is called. Fixes: 8a6e02d0c00e ("of: reserved_mem: Restructure how the reserved memory regions are processed") Signed-off-by: Oreoluwa Babatunde Tested-by: William Zhang Signed-off-by: Marek Szyprowski Link: https://lore.kernel.org/r/20250806172421.2748302-1-oreoluwa.babatunde@oss.qualcomm.com --- include/linux/dma-map-ops.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index f48e5fb88bd5..332b80c42b6f 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -153,6 +153,9 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, { __free_pages(page, get_order(size)); } +static inline void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} #endif /* CONFIG_DMA_CMA*/ #ifdef CONFIG_DMA_DECLARE_COHERENT -- cgit v1.2.3 From 5634c8cb298a7146b4e38873473e280b50e27a2c Mon Sep 17 00:00:00 2001 From: Nitin Gote Date: Fri, 18 Jul 2025 16:20:51 +0530 Subject: iosys-map: Fix undefined behavior in iosys_map_clear() The current iosys_map_clear() implementation reads the potentially uninitialized 'is_iomem' boolean field to decide which union member to clear. This causes undefined behavior when called on uninitialized structures, as 'is_iomem' may contain garbage values like 0xFF. UBSAN detects this as: UBSAN: invalid-load in include/linux/iosys-map.h:267 load of value 255 is not a valid value for type '_Bool' Fix by unconditionally clearing the entire structure with memset(), eliminating the need to read uninitialized data and ensuring all fields are set to known good values. Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/14639 Fixes: 01fd30da0474 ("dma-buf: Add struct dma-buf-map for storing struct dma_buf.vaddr_ptr") Signed-off-by: Nitin Gote Reviewed-by: Andi Shyti Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://lore.kernel.org/r/20250718105051.2709487-1-nitin.r.gote@intel.com --- include/linux/iosys-map.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iosys-map.h b/include/linux/iosys-map.h index 4696abfd311c..3e85afe794c0 100644 --- a/include/linux/iosys-map.h +++ b/include/linux/iosys-map.h @@ -264,12 +264,7 @@ static inline bool iosys_map_is_set(const struct iosys_map *map) */ static inline void iosys_map_clear(struct iosys_map *map) { - if (map->is_iomem) { - map->vaddr_iomem = NULL; - map->is_iomem = false; - } else { - map->vaddr = NULL; - } + memset(map, 0, sizeof(*map)); } /** -- cgit v1.2.3 From 9528d32873b38281ae105f2f5799e79ae9d086c2 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Aug 2025 10:27:45 +0200 Subject: kcov, usb: Don't disable interrupts in kcov_remote_start_usb_softirq() kcov_remote_start_usb_softirq() the begin of urb's completion callback. HCDs marked HCD_BH will invoke this function from the softirq and in_serving_softirq() will detect this properly. Root-HUB (RH) requests will not be delayed to softirq but complete immediately in IRQ context. This will confuse kcov because in_serving_softirq() will report true if the softirq is served after the hardirq and if the softirq got interrupted by the hardirq in which currently runs. This was addressed by simply disabling interrupts in kcov_remote_start_usb_softirq() which avoided the interruption by the RH while a regular completion callback was invoked. This not only changes the behaviour while kconv is enabled but also breaks PREEMPT_RT because now sleeping locks can no longer be acquired. Revert the previous fix. Address the issue by invoking kcov_remote_start_usb() only if the context is just "serving softirqs" which is identified by checking in_serving_softirq() and in_hardirq() must be false. Fixes: f85d39dd7ed89 ("kcov, usb: disable interrupts in kcov_remote_start_usb_softirq") Cc: stable Reported-by: Yunseong Kim Closes: https://lore.kernel.org/all/20250725201400.1078395-2-ysk@kzalloc.com/ Tested-by: Yunseong Kim Signed-off-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/r/20250811082745.ycJqBXMs@linutronix.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 12 +++++------- include/linux/kcov.h | 47 +++++++++-------------------------------------- 2 files changed, 14 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index c4a1875b5d3d..6270fbb5c699 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1636,7 +1636,6 @@ static void __usb_hcd_giveback_urb(struct urb *urb) struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); struct usb_anchor *anchor = urb->anchor; int status = urb->unlinked; - unsigned long flags; urb->hcpriv = NULL; if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) && @@ -1654,14 +1653,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) /* pass ownership to the completion handler */ urb->status = status; /* - * Only collect coverage in the softirq context and disable interrupts - * to avoid scenarios with nested remote coverage collection sections - * that KCOV does not support. - * See the comment next to kcov_remote_start_usb_softirq() for details. + * This function can be called in task context inside another remote + * coverage collection section, but kcov doesn't support that kind of + * recursion yet. Only collect coverage in softirq context for now. */ - flags = kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); + kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - kcov_remote_stop_softirq(flags); + kcov_remote_stop_softirq(); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 75a2fb8b16c3..0143358874b0 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -57,47 +57,21 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * workaround for KCOV's lack of nested remote coverage sections support. - * - * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. - * - * kcov_remote_start_usb_softirq(): - * - * 1. Only collects coverage when called in the softirq context. This allows - * avoiding nested remote coverage collection sections in the task context. - * For example, USB/IP calls usb_hcd_giveback_urb() in the task context - * within an existing remote coverage collection section. Thus, KCOV should - * not attempt to start collecting coverage within the coverage collection - * section in __usb_hcd_giveback_urb() in this case. - * - * 2. Disables interrupts for the duration of the coverage collection section. - * This allows avoiding nested remote coverage collection sections in the - * softirq context (a softirq might occur during the execution of a work in - * the BH workqueue, which runs with in_serving_softirq() > 0). - * For example, usb_giveback_urb_bh() runs in the BH workqueue with - * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in - * the middle of its remote coverage collection section, and the interrupt - * handler might invoke __usb_hcd_giveback_urb() again. + * work around for kcov's lack of nested remote coverage sections support in + * task context. Adding support for nested sections is tracked in: + * https://bugzilla.kernel.org/show_bug.cgi?id=210337 */ -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +static inline void kcov_remote_start_usb_softirq(u64 id) { - unsigned long flags = 0; - - if (in_serving_softirq()) { - local_irq_save(flags); + if (in_serving_softirq() && !in_hardirq()) kcov_remote_start_usb(id); - } - - return flags; } -static inline void kcov_remote_stop_softirq(unsigned long flags) +static inline void kcov_remote_stop_softirq(void) { - if (in_serving_softirq()) { + if (in_serving_softirq() && !in_hardirq()) kcov_remote_stop(); - local_irq_restore(flags); - } } #ifdef CONFIG_64BIT @@ -131,11 +105,8 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline unsigned long kcov_remote_start_usb_softirq(u64 id) -{ - return 0; -} -static inline void kcov_remote_stop_softirq(unsigned long flags) {} +static inline void kcov_remote_start_usb_softirq(u64 id) {} +static inline void kcov_remote_stop_softirq(void) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- cgit v1.2.3 From a032fe30cf09b6723ab61a05aee057311b00f9e1 Mon Sep 17 00:00:00 2001 From: Dongcheng Yan Date: Fri, 25 Apr 2025 18:43:30 +0800 Subject: platform/x86: int3472: add hpd pin support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typically HDMI to MIPI CSI-2 bridges have a pin to signal image data is being received. On the host side this is wired to a GPIO for polling or interrupts. This includes the Lontium HDMI to MIPI CSI-2 bridges lt6911uxe and lt6911uxc. The GPIO "hpd" is used already by other HDMI to CSI-2 bridges, use it here as well. Signed-off-by: Dongcheng Yan Reviewed-by: Sakari Ailus Acked-by: Ilpo Järvinen Reviewed-by: Hans de Goede Reviewed-by: Andy Shevchenko Fixes: 20244cbafbd6 ("media: i2c: change lt6911uxe irq_gpio name to "hpd"") Cc: stable@vger.kernel.org Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/platform/x86/intel/int3472/discrete.c | 6 ++++++ include/linux/platform_data/x86/int3472.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/drivers/platform/x86/intel/int3472/discrete.c b/drivers/platform/x86/intel/int3472/discrete.c index 4c0aed6e626f..bdfb8a800c54 100644 --- a/drivers/platform/x86/intel/int3472/discrete.c +++ b/drivers/platform/x86/intel/int3472/discrete.c @@ -193,6 +193,10 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 *con_id = "privacy-led"; *gpio_flags = GPIO_ACTIVE_HIGH; break; + case INT3472_GPIO_TYPE_HOTPLUG_DETECT: + *con_id = "hpd"; + *gpio_flags = GPIO_ACTIVE_HIGH; + break; case INT3472_GPIO_TYPE_POWER_ENABLE: *con_id = "avdd"; *gpio_flags = GPIO_ACTIVE_HIGH; @@ -223,6 +227,7 @@ static void int3472_get_con_id_and_polarity(struct int3472_discrete_device *int3 * 0x0b Power enable * 0x0c Clock enable * 0x0d Privacy LED + * 0x13 Hotplug detect * * There are some known platform specific quirks where that does not quite * hold up; for example where a pin with type 0x01 (Power down) is mapped to @@ -292,6 +297,7 @@ static int skl_int3472_handle_gpio_resources(struct acpi_resource *ares, switch (type) { case INT3472_GPIO_TYPE_RESET: case INT3472_GPIO_TYPE_POWERDOWN: + case INT3472_GPIO_TYPE_HOTPLUG_DETECT: ret = skl_int3472_map_gpio_to_sensor(int3472, agpio, con_id, gpio_flags); if (ret) err_msg = "Failed to map GPIO pin to sensor\n"; diff --git a/include/linux/platform_data/x86/int3472.h b/include/linux/platform_data/x86/int3472.h index 78276a11c48d..1571e9157fa5 100644 --- a/include/linux/platform_data/x86/int3472.h +++ b/include/linux/platform_data/x86/int3472.h @@ -27,6 +27,7 @@ #define INT3472_GPIO_TYPE_CLK_ENABLE 0x0c #define INT3472_GPIO_TYPE_PRIVACY_LED 0x0d #define INT3472_GPIO_TYPE_HANDSHAKE 0x12 +#define INT3472_GPIO_TYPE_HOTPLUG_DETECT 0x13 #define INT3472_PDEV_MAX_NAME_LEN 23 #define INT3472_MAX_SENSOR_GPIOS 3 -- cgit v1.2.3 From 8ea815399c3fcce1889bd951fec25b5b9a3979c1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 14 Apr 2025 16:41:07 +0200 Subject: compiler: remove __ADDRESSABLE_ASM{_STR,}() again __ADDRESSABLE_ASM_STR() is where the necessary stringification happens. As long as "sym" doesn't contain any odd characters, no quoting is required for its use with .quad / .long. In fact the quotation gets in the way with gas 2.25; it's only from 2.26 onwards that quoted symbols are half-way properly supported. However, assembly being different from C anyway, drop __ADDRESSABLE_ASM_STR() and its helper macro altogether. A simple .global directive will suffice to get the symbol "declared", i.e. into the symbol table. While there also stop open-coding STATIC_CALL_TRAMP() and STATIC_CALL_KEY(). Fixes: 0ef8047b737d ("x86/static-call: provide a way to do very early static-call updates") Signed-off-by: Jan Beulich Acked-by: Josh Poimboeuf Cc: stable@vger.kernel.org Signed-off-by: Juergen Gross Message-ID: <609d2c74-de13-4fae-ab1a-1ec44afb948d@suse.com> --- arch/x86/include/asm/xen/hypercall.h | 5 +++-- include/linux/compiler.h | 8 -------- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 59a62c3780a2..a16d4631547c 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -94,12 +94,13 @@ DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func); #ifdef MODULE #define __ADDRESSABLE_xen_hypercall #else -#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall) +#define __ADDRESSABLE_xen_hypercall \ + __stringify(.global STATIC_CALL_KEY(xen_hypercall);) #endif #define __HYPERCALL \ __ADDRESSABLE_xen_hypercall \ - "call __SCT__xen_hypercall" + __stringify(call STATIC_CALL_TRAMP(xen_hypercall)) #define __HYPERCALL_ENTRY(x) "a" (x) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 6f04a1d8c720..64ff73c533e5 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -288,14 +288,6 @@ static inline void *offset_to_ptr(const int *off) #define __ADDRESSABLE(sym) \ ___ADDRESSABLE(sym, __section(".discard.addressable")) -#define __ADDRESSABLE_ASM(sym) \ - .pushsection .discard.addressable,"aw"; \ - .align ARCH_SEL(8,4); \ - ARCH_SEL(.quad, .long) __stringify(sym); \ - .popsection; - -#define __ADDRESSABLE_ASM_STR(sym) __stringify(__ADDRESSABLE_ASM(sym)) - /* * This returns a constant expression while determining if an argument is * a constant expression, most importantly without evaluating the argument. -- cgit v1.2.3 From 76d2e3890fb169168c73f2e4f8375c7cc24a765e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 16 Aug 2025 07:25:20 -0700 Subject: NFS: Fix a race when updating an existing write After nfs_lock_and_join_requests() tests for whether the request is still attached to the mapping, nothing prevents a call to nfs_inode_remove_request() from succeeding until we actually lock the page group. The reason is that whoever called nfs_inode_remove_request() doesn't necessarily have a lock on the page group head. So in order to avoid races, let's take the page group lock earlier in nfs_lock_and_join_requests(), and hold it across the removal of the request in nfs_inode_remove_request(). Reported-by: Jeff Layton Tested-by: Joe Quanaim Tested-by: Andrew Steffen Reviewed-by: Jeff Layton Fixes: bd37d6fce184 ("NFSv4: Convert nfs_lock_and_join_requests() to use nfs_page_find_head_request()") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 9 +++++---- fs/nfs/write.c | 29 ++++++++++------------------- include/linux/nfs_page.h | 1 + 3 files changed, 16 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 11968dcb7243..6e69ce43a13f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); } -/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fa5c41d0989a..8b7c04737967 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } } -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; } /** @@ -585,19 +575,18 @@ retry: } } + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* Ensure that nobody removed the request before we locked it */ if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); goto retry; } - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto out_unlock; + nfs_cancel_remove_inode(head, inode); /* lock each request in the page group */ for (subreq = head->wb_this_page; @@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio->mapping; @@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req); if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 169b4ae30ff4..9aed39abc94b 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -160,6 +160,7 @@ extern void nfs_join_page_group(struct nfs_page *head, extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_page_group_sync_on_bit_locked(struct nfs_page *, unsigned int); extern int nfs_page_set_headlock(struct nfs_page *req); extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); -- cgit v1.2.3 From 808471ddb0fa785559c3e7aee59be20a13b46ef5 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Wed, 13 Aug 2025 15:04:55 +0900 Subject: iov_iter: iterate_folioq: fix handling of offset >= folio size It's apparently possible to get an iov advanced all the way up to the end of the current page we're looking at, e.g. (gdb) p *iter $24 = {iter_type = 4 '\004', nofault = false, data_source = false, iov_offset = 4096, {__ubuf_iovec = { iov_base = 0xffff88800f5bc000, iov_len = 655}, {{__iov = 0xffff88800f5bc000, kvec = 0xffff88800f5bc000, bvec = 0xffff88800f5bc000, folioq = 0xffff88800f5bc000, xarray = 0xffff88800f5bc000, ubuf = 0xffff88800f5bc000}, count = 655}}, {nr_segs = 2, folioq_slot = 2 '\002', xarray_start = 2}} Where iov_offset is 4k with 4k-sized folios This should have been fine because we're only in the 2nd slot and there's another one after this, but iterate_folioq should not try to map a folio that skips the whole size, and more importantly part here does not end up zero (because 'PAGE_SIZE - skip % PAGE_SIZE' ends up PAGE_SIZE and not zero..), so skip forward to the "advance to next folio" code Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-0-a0ffad2b665a@codewreck.org Link: https://lkml.kernel.org/r/20250813-iot_iter_folio-v3-1-a0ffad2b665a@codewreck.org Signed-off-by: Dominique Martinet Fixes: db0aa2e9566f ("mm: Define struct folio_queue and ITER_FOLIOQ to handle a sequence of folios") Reported-by: Maximilian Bosch Reported-by: Ryan Lahfa Reported-by: Christian Theune Reported-by: Arnout Engelen Link: https://lkml.kernel.org/r/D4LHHUNLG79Y.12PI0X6BEHRHW@mbosch.me/ Acked-by: David Howells Cc: Al Viro Cc: Christian Brauner Cc: Matthew Wilcox (Oracle) Cc: [6.12+] Signed-off-by: Andrew Morton --- include/linux/iov_iter.h | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index c4aa58032faf..f9a17fbbd398 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -160,7 +160,7 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 do { struct folio *folio = folioq_folio(folioq, slot); - size_t part, remain, consumed; + size_t part, remain = 0, consumed; size_t fsize; void *base; @@ -168,14 +168,16 @@ size_t iterate_folioq(struct iov_iter *iter, size_t len, void *priv, void *priv2 break; fsize = folioq_folio_size(folioq, slot); - base = kmap_local_folio(folio, skip); - part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); - remain = step(base, progress, part, priv, priv2); - kunmap_local(base); - consumed = part - remain; - len -= consumed; - progress += consumed; - skip += consumed; + if (skip < fsize) { + base = kmap_local_folio(folio, skip); + part = umin(len, PAGE_SIZE - skip % PAGE_SIZE); + remain = step(base, progress, part, priv, priv2); + kunmap_local(base); + consumed = part - remain; + len -= consumed; + progress += consumed; + skip += consumed; + } if (skip >= fsize) { skip = 0; slot++; -- cgit v1.2.3 From 053c8ebe74f7e1f4c072e59428da80b9d78bc4b7 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sun, 17 Aug 2025 23:17:59 +0800 Subject: mm/migrate: fix NULL movable_ops if CONFIG_ZSMALLOC=m After commit 84caf98838a3e5f4bdb34 ("mm: stop storing migration_ops in page->mapping") we get such an error message if CONFIG_ZSMALLOC=m: WARNING: CPU: 3 PID: 42 at mm/migrate.c:142 isolate_movable_ops_page+0xa8/0x1c0 CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT pc 9000000000540bd8 ra 9000000000540b84 tp 9000000100420000 sp 9000000100423a60 a0 9000000100193a80 a1 000000000000000c a2 000000000000001b a3 ffffffffffffffff a4 ffffffffffffffff a5 0000000000000267 a6 0000000000000000 a7 9000000100423ae0 t0 00000000000000f1 t1 00000000000000f6 t2 0000000000000000 t3 0000000000000001 t4 ffffff00010eb834 t5 0000000000000040 t6 900000010c89d380 t7 90000000023fcc70 t8 0000000000000018 u0 0000000000000000 s9 ffffff00010eb800 s0 ffffff00010eb800 s1 000000000000000c s2 0000000000043ae0 s3 0000800000000000 s4 900000000219cc40 s5 0000000000000000 s6 ffffff00010eb800 s7 0000000000000001 s8 90000000025b4000 ra: 9000000000540b84 isolate_movable_ops_page+0x54/0x1c0 ERA: 9000000000540bd8 isolate_movable_ops_page+0xa8/0x1c0 CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) PRMD: 00000004 (PPLV0 +PIE -PWE) EUEN: 00000000 (-FPE -SXE -ASXE -BTE) ECFG: 00071c1d (LIE=0,2-4,10-12 VS=7) ESTAT: 000c0000 [BRK] (IS= ECode=12 EsubCode=0) PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) CPU: 3 UID: 0 PID: 42 Comm: kcompactd0 Not tainted 6.16.0-rc5+ #2133 PREEMPT Stack : 90000000021fd000 0000000000000000 9000000000247720 9000000100420000 90000001004236a0 90000001004236a8 0000000000000000 90000001004237e8 90000001004237e0 90000001004237e0 9000000100423550 0000000000000001 0000000000000001 90000001004236a8 725a84864a19e2d9 90000000023fcc58 9000000100420000 90000000024c6848 9000000002416848 0000000000000001 0000000000000000 000000000000000a 0000000007fe0000 ffffff00010eb800 0000000000000000 90000000021fd000 0000000000000000 900000000205cf30 000000000000008e 0000000000000009 ffffff00010eb800 0000000000000001 90000000025b4000 0000000000000000 900000000024773c 00007ffff103d748 00000000000000b0 0000000000000004 0000000000000000 0000000000071c1d ... Call Trace: [<900000000024773c>] show_stack+0x5c/0x190 [<90000000002415e0>] dump_stack_lvl+0x70/0x9c [<90000000004abe6c>] isolate_migratepages_block+0x3bc/0x16e0 [<90000000004af408>] compact_zone+0x558/0x1000 [<90000000004b0068>] compact_node+0xa8/0x1e0 [<90000000004b0aa4>] kcompactd+0x394/0x410 [<90000000002b3c98>] kthread+0x128/0x140 [<9000000001779148>] ret_from_kernel_thread+0x28/0xc0 [<9000000000245528>] ret_from_kernel_thread_asm+0x10/0x88 The reason is that defined(CONFIG_ZSMALLOC) evaluates to 1 only when CONFIG_ZSMALLOC=y, we should use IS_ENABLED(CONFIG_ZSMALLOC) instead. But when I use IS_ENABLED(CONFIG_ZSMALLOC), page_movable_ops() cannot access zsmalloc_mops because zsmalloc_mops is in a module. To solve this problem, we define a set_movable_ops() interface to register and unregister offline_movable_ops / zsmalloc_movable_ops in mm/migrate.c, and call them at mm/balloon_compaction.c & mm/zsmalloc.c. Since offline_movable_ops / zsmalloc_movable_ops are always accessible, all #ifdef / #endif are removed in page_movable_ops(). Link: https://lkml.kernel.org/r/20250817151759.2525174-1-chenhuacai@loongson.cn Fixes: 84caf98838a3 ("mm: stop storing migration_ops in page->mapping") Signed-off-by: Huacai Chen Acked-by: Zi Yan Acked-by: David Hildenbrand Cc: Huacai Chen Cc: Huacai Chen Cc: Lorenzo Stoakes Cc: "Michael S. Tsirkin" Cc: Minchan Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton --- include/linux/migrate.h | 5 +++++ mm/balloon_compaction.c | 6 ++++++ mm/migrate.c | 38 ++++++++++++++++++++++++++++++-------- mm/zsmalloc.c | 10 ++++++++++ 4 files changed, 51 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index acadd41e0b5c..9009e27b5f44 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -79,6 +79,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, struct folio *newfolio, struct folio *folio, int extra_count); +int set_movable_ops(const struct movable_operations *ops, enum pagetype type); #else @@ -100,6 +101,10 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping, { return -ENOSYS; } +static inline int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + return -ENOSYS; +} #endif /* CONFIG_MIGRATION */ diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 2a4a649805c1..03c5dbabb156 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -254,4 +254,10 @@ const struct movable_operations balloon_mops = { .putback_page = balloon_page_putback, }; +static int __init balloon_init(void) +{ + return set_movable_ops(&balloon_mops, PGTY_offline); +} +core_initcall(balloon_init); + #endif /* CONFIG_BALLOON_COMPACTION */ diff --git a/mm/migrate.c b/mm/migrate.c index 425401b2d4e1..9e5ef39ce73a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -43,8 +43,6 @@ #include #include #include -#include -#include #include @@ -53,6 +51,33 @@ #include "internal.h" #include "swap.h" +static const struct movable_operations *offline_movable_ops; +static const struct movable_operations *zsmalloc_movable_ops; + +int set_movable_ops(const struct movable_operations *ops, enum pagetype type) +{ + /* + * We only allow for selected types and don't handle concurrent + * registration attempts yet. + */ + switch (type) { + case PGTY_offline: + if (offline_movable_ops && ops) + return -EBUSY; + offline_movable_ops = ops; + break; + case PGTY_zsmalloc: + if (zsmalloc_movable_ops && ops) + return -EBUSY; + zsmalloc_movable_ops = ops; + break; + default: + return -EINVAL; + } + return 0; +} +EXPORT_SYMBOL_GPL(set_movable_ops); + static const struct movable_operations *page_movable_ops(struct page *page) { VM_WARN_ON_ONCE_PAGE(!page_has_movable_ops(page), page); @@ -62,15 +87,12 @@ static const struct movable_operations *page_movable_ops(struct page *page) * it as movable, the page type must be sticky until the page gets freed * back to the buddy. */ -#ifdef CONFIG_BALLOON_COMPACTION if (PageOffline(page)) /* Only balloon compaction sets PageOffline pages movable. */ - return &balloon_mops; -#endif /* CONFIG_BALLOON_COMPACTION */ -#if defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) + return offline_movable_ops; if (PageZsmalloc(page)) - return &zsmalloc_mops; -#endif /* defined(CONFIG_ZSMALLOC) && defined(CONFIG_COMPACTION) */ + return zsmalloc_movable_ops; + return NULL; } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 2c5e56a65354..805a10b41266 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2246,8 +2246,15 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool); static int __init zs_init(void) { + int rc __maybe_unused; + #ifdef CONFIG_ZPOOL zpool_register_driver(&zs_zpool_driver); +#endif +#ifdef CONFIG_COMPACTION + rc = set_movable_ops(&zsmalloc_mops, PGTY_zsmalloc); + if (rc) + return rc; #endif zs_stat_init(); return 0; @@ -2257,6 +2264,9 @@ static void __exit zs_exit(void) { #ifdef CONFIG_ZPOOL zpool_unregister_driver(&zs_zpool_driver); +#endif +#ifdef CONFIG_COMPACTION + set_movable_ops(NULL, PGTY_zsmalloc); #endif zs_stat_exit(); } -- cgit v1.2.3 From 370ac285f23aecae40600851fb4a1a9e75e50973 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Thu, 14 Aug 2025 13:54:59 +0530 Subject: block: avoid cpu_hotplug_lock depedency on freeze_lock A recent lockdep[1] splat observed while running blktest block/005 reveals a potential deadlock caused by the cpu_hotplug_lock dependency on ->freeze_lock. This dependency was introduced by commit 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key"). That change added a static key to avoid fetching q->rq_qos when neither blk-wbt nor blk-iolatency is configured. The static key dynamically patches kernel text to a NOP when disabled, eliminating overhead of fetching q->rq_qos in the I/O hot path. However, enabling a static key at runtime requires acquiring both cpu_hotplug_lock and jump_label_mutex. When this happens after the queue has already been frozen (i.e., while holding ->freeze_lock), it creates a locking dependency from cpu_hotplug_lock to ->freeze_lock, which leads to a potential deadlock reported by lockdep [1]. To resolve this, replace the static key mechanism with q->queue_flags: QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos; otherwise, the access is skipped. Since q->queue_flags is commonly accessed in IO hotpath and resides in the first cacheline of struct request_queue, checking it imposes minimal overhead while eliminating the deadlock risk. This change avoids the lockdep splat without introducing performance regressions. [1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Reported-by: Shinichiro Kawasaki Closes: https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/ Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key") Tested-by: Shin'ichiro Kawasaki Signed-off-by: Nilay Shroff Reviewed-by: Ming Lei Reviewed-by: Yu Kuai Link: https://lore.kernel.org/r/20250814082612.500845-4-nilay@linux.ibm.com Signed-off-by: Jens Axboe --- block/blk-mq-debugfs.c | 1 + block/blk-rq-qos.c | 9 ++++----- block/blk-rq-qos.h | 54 +++++++++++++++++++++++++++++--------------------- include/linux/blkdev.h | 1 + 4 files changed, 37 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 7ed3e71f2fc0..32c65efdda46 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -95,6 +95,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(SQ_SCHED), QUEUE_FLAG_NAME(DISABLE_WBT_DEF), QUEUE_FLAG_NAME(NO_ELV_SWITCH), + QUEUE_FLAG_NAME(QOS_ENABLED), }; #undef QUEUE_FLAG_NAME diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index b1e24bb85ad2..654478dfbc20 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -2,8 +2,6 @@ #include "blk-rq-qos.h" -__read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos); - /* * Increment 'v', if 'v' is below 'below'. Returns true if we succeeded, * false if 'v' + 1 would be bigger than 'below'. @@ -319,8 +317,8 @@ void rq_qos_exit(struct request_queue *q) struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; rqos->ops->exit(rqos); - static_branch_dec(&block_rq_qos); } + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); mutex_unlock(&q->rq_qos_mutex); } @@ -346,7 +344,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id, goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; - static_branch_inc(&block_rq_qos); + blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); @@ -374,10 +372,11 @@ void rq_qos_del(struct rq_qos *rqos) for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) { if (*cur == rqos) { *cur = rqos->next; - static_branch_dec(&block_rq_qos); break; } } + if (!q->rq_qos) + blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&q->debugfs_mutex); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 28125fc49eff..1fe22000a379 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -12,7 +12,6 @@ #include "blk-mq-debugfs.h" struct blk_mq_debugfs_attr; -extern struct static_key_false block_rq_qos; enum rq_qos_id { RQ_QOS_WBT, @@ -113,49 +112,55 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos); static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_cleanup(q->rq_qos, bio); } static inline void rq_qos_done(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos && - !blk_rq_is_passthrough(rq)) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos && !blk_rq_is_passthrough(rq)) __rq_qos_done(q->rq_qos, rq); } static inline void rq_qos_issue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_issue(q->rq_qos, rq); } static inline void rq_qos_requeue(struct request_queue *q, struct request *rq) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_requeue(q->rq_qos, rq); } static inline void rq_qos_done_bio(struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && - bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) || - bio_flagged(bio, BIO_QOS_MERGED))) { - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - - /* - * If a bio has BIO_QOS_xxx set, it implicitly implies that - * q->rq_qos is present. So, we skip re-checking q->rq_qos - * here as an extra optimization and directly call - * __rq_qos_done_bio(). - */ - __rq_qos_done_bio(q->rq_qos, bio); - } + struct request_queue *q; + + if (!bio->bi_bdev || (!bio_flagged(bio, BIO_QOS_THROTTLED) && + !bio_flagged(bio, BIO_QOS_MERGED))) + return; + + q = bdev_get_queue(bio->bi_bdev); + + /* + * If a bio has BIO_QOS_xxx set, it implicitly implies that + * q->rq_qos is present. So, we skip re-checking q->rq_qos + * here as an extra optimization and directly call + * __rq_qos_done_bio(). + */ + __rq_qos_done_bio(q->rq_qos, bio); } static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_THROTTLED); __rq_qos_throttle(q->rq_qos, bio); } @@ -164,14 +169,16 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio) static inline void rq_qos_track(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_track(q->rq_qos, rq, bio); } static inline void rq_qos_merge(struct request_queue *q, struct request *rq, struct bio *bio) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) { + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) { bio_set_flag(bio, BIO_QOS_MERGED); __rq_qos_merge(q->rq_qos, rq, bio); } @@ -179,7 +186,8 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq, static inline void rq_qos_queue_depth_changed(struct request_queue *q) { - if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) + if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) && + q->rq_qos) __rq_qos_queue_depth_changed(q->rq_qos); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95886b404b16..fe1797bbec42 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,6 +656,7 @@ enum { QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ + QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */ QUEUE_FLAG_MAX }; -- cgit v1.2.3 From b08a784a5d1495c42ff9b0c70887d49211cddfe0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 18 Aug 2025 19:03:54 +0100 Subject: net: Introduce skb_copy_datagram_from_iter_full() In a similar manner to copy_from_iter()/copy_from_iter_full(), introduce skb_copy_datagram_from_iter_full() which reverts the iterator to its initial state when returning an error. A subsequent fix for a vsock regression will make use of this new function. Cc: Christian Brauner Cc: Alexander Viro Signed-off-by: Will Deacon Acked-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Link: https://patch.msgid.link/20250818180355.29275-2-will@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 ++ net/core/datagram.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 14b923ddb6df..fa633657e4c0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4172,6 +4172,8 @@ int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); diff --git a/net/core/datagram.c b/net/core/datagram.c index 94cc4705e91d..f474b9b120f9 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -618,6 +618,20 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); +int skb_copy_datagram_from_iter_full(struct sk_buff *skb, int offset, + struct iov_iter *from, int len) +{ + struct iov_iter_state state; + int ret; + + iov_iter_save_state(from, &state); + ret = skb_copy_datagram_from_iter(skb, offset, from, len); + if (ret) + iov_iter_restore(from, &state); + return ret; +} +EXPORT_SYMBOL(skb_copy_datagram_from_iter_full); + int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length) { -- cgit v1.2.3 From ec79003c5f9d2c7f9576fc69b8dbda80305cbe3a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Thu, 21 Aug 2025 02:18:24 +0000 Subject: atm: atmtcp: Prevent arbitrary write in atmtcp_recv_control(). syzbot reported the splat below. [0] When atmtcp_v_open() or atmtcp_v_close() is called via connect() or close(), atmtcp_send_control() is called to send an in-kernel special message. The message has ATMTCP_HDR_MAGIC in atmtcp_control.hdr.length. Also, a pointer of struct atm_vcc is set to atmtcp_control.vcc. The notable thing is struct atmtcp_control is uAPI but has a space for an in-kernel pointer. struct atmtcp_control { struct atmtcp_hdr hdr; /* must be first */ ... atm_kptr_t vcc; /* both directions */ ... } __ATM_API_ALIGN; typedef struct { unsigned char _[8]; } __ATM_API_ALIGN atm_kptr_t; The special message is processed in atmtcp_recv_control() called from atmtcp_c_send(). atmtcp_c_send() is vcc->dev->ops->send() and called from 2 paths: 1. .ndo_start_xmit() (vcc->send() == atm_send_aal0()) 2. vcc_sendmsg() The problem is sendmsg() does not validate the message length and userspace can abuse atmtcp_recv_control() to overwrite any kptr by atmtcp_control. Let's add a new ->pre_send() hook to validate messages from sendmsg(). [0]: Oops: general protection fault, probably for non-canonical address 0xdffffc00200000ab: 0000 [#1] SMP KASAN PTI KASAN: probably user-memory-access in range [0x0000000100000558-0x000000010000055f] CPU: 0 UID: 0 PID: 5865 Comm: syz-executor331 Not tainted 6.17.0-rc1-syzkaller-00215-gbab3ce404553 #0 PREEMPT(full) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/12/2025 RIP: 0010:atmtcp_recv_control drivers/atm/atmtcp.c:93 [inline] RIP: 0010:atmtcp_c_send+0x1da/0x950 drivers/atm/atmtcp.c:297 Code: 4d 8d 75 1a 4c 89 f0 48 c1 e8 03 42 0f b6 04 20 84 c0 0f 85 15 06 00 00 41 0f b7 1e 4d 8d b7 60 05 00 00 4c 89 f0 48 c1 e8 03 <42> 0f b6 04 20 84 c0 0f 85 13 06 00 00 66 41 89 1e 4d 8d 75 1c 4c RSP: 0018:ffffc90003f5f810 EFLAGS: 00010203 RAX: 00000000200000ab RBX: 0000000000000000 RCX: 0000000000000000 RDX: ffff88802a510000 RSI: 00000000ffffffff RDI: ffff888030a6068c RBP: ffff88802699fb40 R08: ffff888030a606eb R09: 1ffff1100614c0dd R10: dffffc0000000000 R11: ffffffff8718fc40 R12: dffffc0000000000 R13: ffff888030a60680 R14: 000000010000055f R15: 00000000ffffffff FS: 00007f8d7e9236c0(0000) GS:ffff888125c1c000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000045ad50 CR3: 0000000075bde000 CR4: 00000000003526f0 Call Trace: vcc_sendmsg+0xa10/0xc60 net/atm/common.c:645 sock_sendmsg_nosec net/socket.c:714 [inline] __sock_sendmsg+0x219/0x270 net/socket.c:729 ____sys_sendmsg+0x505/0x830 net/socket.c:2614 ___sys_sendmsg+0x21f/0x2a0 net/socket.c:2668 __sys_sendmsg net/socket.c:2700 [inline] __do_sys_sendmsg net/socket.c:2705 [inline] __se_sys_sendmsg net/socket.c:2703 [inline] __x64_sys_sendmsg+0x19b/0x260 net/socket.c:2703 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f8d7e96a4a9 Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 18 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f8d7e923198 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 00007f8d7e9f4308 RCX: 00007f8d7e96a4a9 RDX: 0000000000000000 RSI: 0000200000000240 RDI: 0000000000000005 RBP: 00007f8d7e9f4300 R08: 65732f636f72702f R09: 65732f636f72702f R10: 65732f636f72702f R11: 0000000000000246 R12: 00007f8d7e9c10ac R13: 00007f8d7e9231a0 R14: 0000200000000200 R15: 0000200000000250 Modules linked in: Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68a6767c.050a0220.3d78fd.0011.GAE@google.com/ Tested-by: syzbot+1741b56d54536f4ec349@syzkaller.appspotmail.com Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250821021901.2814721-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- drivers/atm/atmtcp.c | 17 ++++++++++++++--- include/linux/atmdev.h | 1 + net/atm/common.c | 15 ++++++++++++--- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index eeae160c898d..fa3c76a2b49d 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -279,6 +279,19 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci) return NULL; } +static int atmtcp_c_pre_send(struct atm_vcc *vcc, struct sk_buff *skb) +{ + struct atmtcp_hdr *hdr; + + if (skb->len < sizeof(struct atmtcp_hdr)) + return -EINVAL; + + hdr = (struct atmtcp_hdr *)skb->data; + if (hdr->length == ATMTCP_HDR_MAGIC) + return -EINVAL; + + return 0; +} static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) { @@ -288,9 +301,6 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) struct sk_buff *new_skb; int result = 0; - if (skb->len < sizeof(struct atmtcp_hdr)) - goto done; - dev = vcc->dev_data; hdr = (struct atmtcp_hdr *) skb->data; if (hdr->length == ATMTCP_HDR_MAGIC) { @@ -347,6 +357,7 @@ static const struct atmdev_ops atmtcp_v_dev_ops = { static const struct atmdev_ops atmtcp_c_dev_ops = { .close = atmtcp_c_close, + .pre_send = atmtcp_c_pre_send, .send = atmtcp_c_send }; diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 45f2f278b50a..70807c679f1a 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -185,6 +185,7 @@ struct atmdev_ops { /* only send is required */ int (*compat_ioctl)(struct atm_dev *dev,unsigned int cmd, void __user *arg); #endif + int (*pre_send)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send)(struct atm_vcc *vcc,struct sk_buff *skb); int (*send_bh)(struct atm_vcc *vcc, struct sk_buff *skb); int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags); diff --git a/net/atm/common.c b/net/atm/common.c index d7f7976ea13a..881c7f259dbd 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -635,18 +635,27 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) skb->dev = NULL; /* for paths shared with net_device interfaces */ if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { - atm_return_tx(vcc, skb); - kfree_skb(skb); error = -EFAULT; - goto out; + goto free_skb; } if (eff != size) memset(skb->data + size, 0, eff-size); + + if (vcc->dev->ops->pre_send) { + error = vcc->dev->ops->pre_send(vcc, skb); + if (error) + goto free_skb; + } + error = vcc->dev->ops->send(vcc, skb); error = error ? error : size; out: release_sock(sk); return error; +free_skb: + atm_return_tx(vcc, skb); + kfree_skb(skb); + goto out; } __poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) -- cgit v1.2.3 From ced17ee32a9988b8a260628e7c31a100d7dc082e Mon Sep 17 00:00:00 2001 From: Igor Torrente Date: Thu, 7 Aug 2025 09:41:45 -0300 Subject: Revert "virtio: reject shm region if length is zero" The commit 206cc44588f7 ("virtio: reject shm region if length is zero") breaks the Virtio-gpu `host_visible` feature. As you can see in the snippet below, host_visible_region is zero because of the `kzalloc`. It's using the `vm_get_shm_region` (drivers/virtio/virtio_mmio.c:536) to read the `addr` and `len` from qemu/crosvm. ``` drivers/gpu/drm/virtio/virtgpu_kms.c 132 vgdev = drmm_kzalloc(dev, sizeof(struct virtio_gpu_device), GFP_KERNEL); [...] 177 if (virtio_get_shm_region(vgdev->vdev, &vgdev->host_visible_region, 178 VIRTIO_GPU_SHM_ID_HOST_VISIBLE)) { ``` Now it always fails. To fix, revert the offending commit. Fixes: 206cc44588f7 ("virtio: reject shm region if length is zero") Signed-off-by: Igor Torrente Message-Id: <20250807124145.81816-1-igor.torrente@collabora.com> Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 918cf25cd3c6..8bf156dde554 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -328,8 +328,6 @@ static inline bool virtio_get_shm_region(struct virtio_device *vdev, struct virtio_shm_region *region, u8 id) { - if (!region->len) - return false; if (!vdev->config->get_shm_region) return false; return vdev->config->get_shm_region(vdev, region, id); -- cgit v1.2.3 From b3dcc9d1d806fb1e175f85978713eef868531da4 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Tue, 26 Aug 2025 10:19:46 +0300 Subject: memblock: fix kernel-doc for MEMBLOCK_RSRV_NOINIT The kernel-doc description of MEMBLOCK_RSRV_NOINIT and memblock_reserved_mark_noinit() do not accurately describe their functionality. Expand their kernel doc to make it clear that the user of MEMBLOCK_RSRV_NOINIT is responsible to properly initialize the struct pages for such regions and add more details about effects of using this flag. Reviewed-by: David Hildenbrand Link: https://lore.kernel.org/r/f8140a17-c4ec-489b-b314-d45abe48bf36@redhat.com Link: https://lore.kernel.org/r/20250826071947.1949725-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) --- include/linux/memblock.h | 5 +++-- mm/memblock.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b96746376e17..fcda8481de9a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -40,8 +40,9 @@ extern unsigned long long max_possible_pfn; * via a driver, and never indicated in the firmware-provided memory map as * system RAM. This corresponds to IORESOURCE_SYSRAM_DRIVER_MANAGED in the * kernel resource tree. - * @MEMBLOCK_RSRV_NOINIT: memory region for which struct pages are - * not initialized (only for reserved regions). + * @MEMBLOCK_RSRV_NOINIT: reserved memory region for which struct pages are not + * fully initialized. Users of this flag are responsible to properly initialize + * struct pages of this region * @MEMBLOCK_RSRV_KERN: memory region that is reserved for kernel use, * either explictitly with memblock_reserve_kern() or via memblock * allocation APIs. All memblock allocations set this flag. diff --git a/mm/memblock.c b/mm/memblock.c index 8a0ed3074af4..117d963e677c 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1091,13 +1091,20 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) /** * memblock_reserved_mark_noinit - Mark a reserved memory region with flag - * MEMBLOCK_RSRV_NOINIT which results in the struct pages not being initialized - * for this region. + * MEMBLOCK_RSRV_NOINIT + * * @base: the base phys addr of the region * @size: the size of the region * - * struct pages will not be initialized for reserved memory regions marked with - * %MEMBLOCK_RSRV_NOINIT. + * The struct pages for the reserved regions marked %MEMBLOCK_RSRV_NOINIT will + * not be fully initialized to allow the caller optimize their initialization. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, setting this flag + * completely bypasses the initialization of struct pages for such region. + * + * When %CONFIG_DEFERRED_STRUCT_PAGE_INIT is disabled, struct pages in this + * region will be initialized with default values but won't be marked as + * reserved. * * Return: 0 on success, -errno on failure. */ -- cgit v1.2.3