From 48fcc895403cc97aa6c776cb65e6aa11290c0b44 Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Thu, 23 Apr 2026 17:26:26 +0000 Subject: mshv: add a missing padding field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That was missed when importing the header. Reported-by: Doru Blânzeanu Reported-by: Magnus Kulke Fixes: e68bda71a2384 ("hyperv: Add new Hyper-V headers in include/hyperv") Cc: stable@kernel.org Reviewed-by: Easwar Hariharan Signed-off-by: Wei Liu --- include/hyperv/hvhdk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/hyperv/hvhdk.h b/include/hyperv/hvhdk.h index 5e83d3714966..0c89c62c9706 100644 --- a/include/hyperv/hvhdk.h +++ b/include/hyperv/hvhdk.h @@ -79,6 +79,7 @@ struct hv_vp_register_page { u64 registers[18]; }; + u8 reserved[8]; /* Volatile XMM registers (HV_X64_REGISTER_CLASS_XMM) */ union { struct { -- cgit v1.2.3 From c5c3ef8d49e15d2fc1cec4ad7c91d81b99977440 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 17 Feb 2026 10:23:34 -0800 Subject: Drivers: hv: vmbus: Provide option to skip VMBus unload on panic Currently, VMBus code initiates a VMBus unload in the panic path so that if a kdump kernel is loaded, it can start fresh in setting up its own VMBus connection. However, a driver for the VMBus virtual frame buffer may need to flush dirty portions of the frame buffer back to the Hyper-V host so that panic information is visible in the graphics console. To support such flushing, provide exported functions for the frame buffer driver to specify that the VMBus unload should not be done by the VMBus driver, and to initiate the VMBus unload itself. Together these allow a frame buffer driver to delay the VMBus unload until after it has completed the flush. Ideally, the VMBus driver could use its own panic-path callback to do the unload after all frame buffer drivers have finished. But DRM frame buffer drivers use the kmsg dump callback, and there are no callbacks after that in the panic path. Hence this somewhat messy approach to properly sequencing the frame buffer flush and the VMBus unload. Fixes: 3671f3777758 ("drm/hyperv: Add support for drm_panic") Signed-off-by: Michael Kelley Reviewed-by: Long Li Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 1 + drivers/hv/hyperv_vmbus.h | 1 - drivers/hv/vmbus_drv.c | 25 ++++++++++++++++++------- include/linux/hyperv.h | 3 +++ 4 files changed, 22 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 84eb0a6a0b54..89d214dda360 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -952,6 +952,7 @@ void vmbus_initiate_unload(bool crash) else vmbus_wait_for_unload(); } +EXPORT_SYMBOL_GPL(vmbus_initiate_unload); static void vmbus_setup_channel_state(struct vmbus_channel *channel, struct vmbus_channel_offer_channel *offer) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 05a36854389a..eb8bdd8bb1f5 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -441,7 +441,6 @@ void hv_vss_deinit(void); int hv_vss_pre_suspend(void); int hv_vss_pre_resume(void); void hv_vss_onchannelcallback(void *context); -void vmbus_initiate_unload(bool crash); static inline void hv_poll_channel(struct vmbus_channel *channel, void (*cb)(void *)) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d28ff45d4cfd..c9eeb2ec365d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -69,19 +69,29 @@ bool vmbus_is_confidential(void) } EXPORT_SYMBOL_GPL(vmbus_is_confidential); +static bool skip_vmbus_unload; + +/* + * Allow a VMBus framebuffer driver to specify that in the case of a panic, + * it will do the VMbus unload operation once it has flushed any dirty + * portions of the framebuffer to the Hyper-V host. + */ +void vmbus_set_skip_unload(bool skip) +{ + skip_vmbus_unload = skip; +} +EXPORT_SYMBOL_GPL(vmbus_set_skip_unload); + /* * The panic notifier below is responsible solely for unloading the * vmbus connection, which is necessary in a panic event. - * - * Notice an intrincate relation of this notifier with Hyper-V - * framebuffer panic notifier exists - we need vmbus connection alive - * there in order to succeed, so we need to order both with each other - * [see hvfb_on_panic()] - this is done using notifiers' priorities. */ static int hv_panic_vmbus_unload(struct notifier_block *nb, unsigned long val, void *args) { - vmbus_initiate_unload(true); + if (!skip_vmbus_unload) + vmbus_initiate_unload(true); + return NOTIFY_DONE; } static struct notifier_block hyperv_panic_vmbus_unload_block = { @@ -2897,7 +2907,8 @@ static void hv_crash_handler(struct pt_regs *regs) { int cpu; - vmbus_initiate_unload(true); + if (!skip_vmbus_unload) + vmbus_initiate_unload(true); /* * In crash handler we can't schedule synic cleanup for all CPUs, * doing the cleanup for current CPU only. This should be sufficient diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 964f1be8150c..41a3d82f0722 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1336,6 +1336,9 @@ int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, bool fb_overlap_ok); void vmbus_free_mmio(resource_size_t start, resource_size_t size); +void vmbus_initiate_unload(bool crash); +void vmbus_set_skip_unload(bool skip); + /* * GUID definitions of various offer types - services offered to the guest. */ -- cgit v1.2.3 From c15d7a2a11ea055bcecc0b538ae8ba79475637f9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 4 Dec 2025 11:17:23 +0100 Subject: tee: fix tee_ioctl_object_invoke_arg padding The tee_ioctl_object_invoke_arg structure has padding on some architectures but not on x86-32 and a few others: include/linux/tee.h:474:32: error: padding struct to align 'params' [-Werror=padded] I expect that all current users of this are on architectures that do have implicit padding here (arm64, arm, x86, riscv), so make the padding explicit in order to avoid surprises if this later gets used elsewhere. Fixes: d5b8b0fa1775 ("tee: add TEE_IOCTL_PARAM_ATTR_TYPE_OBJREF") Signed-off-by: Arnd Bergmann Reviewed-by: Jens Wiklander Tested-by: Harshal Dev Reviewed-by: Sumit Garg Signed-off-by: Jens Wiklander --- include/uapi/linux/tee.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/tee.h b/include/uapi/linux/tee.h index cab5cadca8ef..5203977ed35d 100644 --- a/include/uapi/linux/tee.h +++ b/include/uapi/linux/tee.h @@ -470,6 +470,7 @@ struct tee_ioctl_object_invoke_arg { __u32 op; __u32 ret; __u32 num_params; + __u32 :32; /* num_params tells the actual number of element in params */ struct tee_ioctl_param params[]; }; -- cgit v1.2.3 From 83eb00f31eb1b10735d48e469df72cc2b0e06f6d Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 May 2026 12:21:01 -0700 Subject: hyperv: Clean up and fix the guest ID comment in hvgdk.h Change the "64 bit" to "64-bit", and the "Os" to "OS". Remove the obsolete paragraph since the guideline has been published in the Hypervisor Top Level Functional Specification for many years. The "OS Type" is 0x1 for Linux, not 0x100. No functional change. Fixes: 83ba0c4f3f31 ("Drivers: hv: Cleanup the guest ID computation") Signed-off-by: Dexuan Cui Reviewed-by: Hamza Mahfooz Signed-off-by: Wei Liu --- include/hyperv/hvgdk.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/hyperv/hvgdk.h b/include/hyperv/hvgdk.h index 384c3f3ff4a5..f538144280ca 100644 --- a/include/hyperv/hvgdk.h +++ b/include/hyperv/hvgdk.h @@ -10,18 +10,12 @@ /* * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is + * The guest ID is a 64-bit entity and the structure of this ID is * specified in the Hyper-V TLFS specification. * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * * Bit(s) * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 + * 62:56 - OS Type; Linux is 0x1 * 55:48 - Distro specific identification * 47:16 - Linux kernel version number * 15:0 - Distro specific identification -- cgit v1.2.3 From 840b740a35bf969734e0f2e44c21289fdd03079e Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Tue, 26 May 2026 07:13:04 -0700 Subject: mshv: Add conditional VMBus dependency When the VMBus driver is not part of the kernel (CONFIG_HYPERV_VMBUS=n), the MSHV root driver fails to link: ERROR: modpost: "hv_vmbus_exists" [drivers/hv/mshv_root.ko] undefined! Fix this while meeting these requirements: * It must be possible to include the MSHV root driver without the VMBus driver. In such case, the MSHV root driver can be built-in to the kernel image, or it can be built as a separate module. * If both the MSHV root driver and the VMBus driver are present, the MSHV root driver and VMBus driver can both be built-in, or they can both be separate modules. Or the MSHV root driver can be a module while the VMBus driver can be built-in, but the reverse is disallowed. Regardless of the build choices, the VMBus driver must be loaded before the MSHV driver in order for the SynIC to be managed properly (see comments in the MSHV SynIC code). The fix has two parts: * Add a Kconfig entry for MSHV_ROOT to depend on HYPERV_VMBUS if HYPERV_VMBUS is present. The entry disallows MSHV_ROOT being built-in when HYPERV_VMBUS is a module, but without requiring that HYPERV_VMBUS be built. * Add a stub implementation of hv_vmbus_exists() for when the VMBus driver is not present so that the MSHV root driver has no module dependency on VMBus. When the VMBus driver *is* present, the module dependency ensures that the VMBus driver loads first when both are built as modules. Existing code ensures that the VMBus driver loads first if it is built-in. The VMBus driver uses subsys_initcall(), which is initcall level 4. The MSHV root driver uses module_init(), which becomes device_init() when built-in, and device_init() is initcall level 6. Reported-by: Arnd Bergmann Closes: https://lore.kernel.org/all/20260520074044.923728-1-arnd@kernel.org/ Signed-off-by: Michael Kelley Acked-by: Arnd Bergmann Reviewed-by: Jork Loeser Reviewed-by: Hardik Garg Signed-off-by: Wei Liu --- drivers/hv/Kconfig | 1 + include/linux/hyperv.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 2d0b3fcb0ff8..aa11bcefddf2 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -74,6 +74,7 @@ config MSHV_ROOT # e.g. When withdrawing memory, the hypervisor gives back 4k pages in # no particular order, making it impossible to reassemble larger pages depends on PAGE_SIZE_4KB + depends on HYPERV_VMBUS if HYPERV_VMBUS select EVENTFD select VIRT_XFER_TO_GUEST_WORK select HMM_MIRROR diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 41a3d82f0722..734b7ef98f4d 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1304,7 +1304,11 @@ static inline void *hv_get_drvdata(struct hv_device *dev) struct device *hv_get_vmbus_root_device(void); +#if IS_ENABLED(CONFIG_HYPERV_VMBUS) bool hv_vmbus_exists(void); +#else +static inline bool hv_vmbus_exists(void) { return false; } +#endif struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; -- cgit v1.2.3 From 3c2d42b8ee345b17a4ba56b0f6492d1ff4c1178e Mon Sep 17 00:00:00 2001 From: Wupeng Ma Date: Fri, 22 May 2026 09:03:05 +0800 Subject: mm/memory-failure: fix hugetlb_lock AA deadlock in get_huge_page_for_hwpoison Two concurrent madvise(MADV_HWPOISON) calls on the same hugetlb page can trigger a recursive spinlock self-deadlock (AA deadlock) on hugetlb_lock when racing with a concurrent unmap: thread#0 thread#1 -------- -------- madvise(folio, MADV_HWPOISON) -> poisons the folio successfully madvise(folio, MADV_HWPOISON) unmap(folio) try_memory_failure_hugetlb get_huge_page_for_hwpoison spin_lock_irq(&hugetlb_lock) <- held __get_huge_page_for_hwpoison hugetlb_update_hwpoison() -> MF_HUGETLB_FOLIO_PRE_POISONED goto out: folio_put() refcount: 1 -> 0 free_huge_folio() spin_lock_irqsave(&hugetlb_lock) -> AA DEADLOCK! The out: path in __get_huge_page_for_hwpoison() calls folio_put() to drop the GUP reference while the hugetlb_lock is still held by the hugetlb.c wrapper get_huge_page_for_hwpoison(). If concurrent unmap has released the page table mapping reference, folio_put() drops the folio refcount to zero, triggering free_huge_folio() which attempts to re-acquire the non-recursive hugetlb_lock. Fix this by moving hugetlb_lock acquisition from the hugetlb.c wrapper into get_huge_page_for_hwpoison(). Place spin_unlock_irq() before the folio_put() at the out: label so the folio is always released outside the lock. [akpm@linux-foundation.org: fix race, rename label per Miaohe] Link: https://sashiko.dev/#/patchset/20260522010305.4099834-1-mawupeng1@huawei.com Link: https://lore.kernel.org/f39f405e-4b4b-8f79-70fe-a2b5b62114eb@huawei.com Link: https://lore.kernel.org/20260522010305.4099834-1-mawupeng1@huawei.com Fixes: 405ce051236c ("mm/hwpoison: fix race between hugetlb free/demotion and memory_failure_hugetlb()") Signed-off-by: Wupeng Ma Acked-by: Oscar Salvador (SUSE) Acked-by: Muchun Song Reviewed-by: Kefeng Wang Acked-by: Miaohe Lin Cc: David Hildenbrand Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Michal Hocko Cc: Mike Rapoport Cc: Naoya Horiguchi Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- include/linux/hugetlb.h | 8 -------- include/linux/mm.h | 8 -------- mm/hugetlb.c | 11 ----------- mm/memory-failure.c | 19 ++++++++++--------- 4 files changed, 10 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5957bc25efa8..2abaf99321e9 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -153,8 +153,6 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); -int get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared); void folio_putback_hugetlb(struct folio *folio); void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); void hugetlb_fix_reserve_counts(struct inode *inode); @@ -421,12 +419,6 @@ static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, return 0; } -static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared) -{ - return 0; -} - static inline void folio_putback_hugetlb(struct folio *folio) { } diff --git a/include/linux/mm.h b/include/linux/mm.h index 06bbe9eba636..fc2acedf0b76 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4975,8 +4975,6 @@ extern int soft_offline_page(unsigned long pfn, int flags); */ extern const struct attribute_group memory_failure_attr_group; extern void memory_failure_queue(unsigned long pfn, int flags); -extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared); void num_poisoned_pages_inc(unsigned long pfn); void num_poisoned_pages_sub(unsigned long pfn, long i); #else @@ -4984,12 +4982,6 @@ static inline void memory_failure_queue(unsigned long pfn, int flags) { } -static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared) -{ - return 0; -} - static inline void num_poisoned_pages_inc(unsigned long pfn) { } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1b1d4f87a3a4..c921287489de 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7161,17 +7161,6 @@ int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison return ret; } -int get_huge_page_for_hwpoison(unsigned long pfn, int flags, - bool *migratable_cleared) -{ - int ret; - - spin_lock_irq(&hugetlb_lock); - ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared); - spin_unlock_irq(&hugetlb_lock); - return ret; -} - /** * folio_putback_hugetlb - unisolate a hugetlb folio * @folio: the isolated hugetlb folio diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ee42d4361309..d47aef256a32 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1966,20 +1966,19 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio) folio_free_raw_hwp(folio, true); } -/* - * Called from hugetlb code with hugetlb_lock held. - */ -int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, +static int get_huge_page_for_hwpoison(unsigned long pfn, int flags, bool *migratable_cleared) { struct page *page = pfn_to_page(pfn); - struct folio *folio = page_folio(page); + struct folio *folio; bool count_increased = false; int ret, rc; + spin_lock_irq(&hugetlb_lock); + folio = page_folio(page); if (!folio_test_hugetlb(folio)) { ret = MF_HUGETLB_NON_HUGEPAGE; - goto out; + goto out_unlock; } else if (flags & MF_COUNT_INCREASED) { ret = MF_HUGETLB_IN_USED; count_increased = true; @@ -1995,13 +1994,13 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, } else { ret = MF_HUGETLB_RETRY; if (!(flags & MF_NO_RETRY)) - goto out; + goto out_unlock; } rc = hugetlb_update_hwpoison(folio, page); if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) { ret = rc; - goto out; + goto out_unlock; } /* @@ -2013,8 +2012,10 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, *migratable_cleared = true; } + spin_unlock_irq(&hugetlb_lock); return ret; -out: +out_unlock: + spin_unlock_irq(&hugetlb_lock); if (count_increased) folio_put(folio); return ret; -- cgit v1.2.3 From 193989cc6d80dd8e0460fb3992e69fa03bf0ff9b Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 25 May 2026 07:07:44 +0300 Subject: ipvs: clear the svc scheduler ptr early on edit ip_vs_edit_service() while unbinding the old scheduler clears the svc->scheduler ptr after the scheduler module initiates RCU callbacks. This can cause packets to use the old scheduler at the time when svc->sched_data is already freed after RCU grace period. Fix it by clearing the ptr early in ip_vs_unbind_scheduler(), before the done_service method schedules any RCU callbacks. Also, if the new scheduler fails to initialize when replacing the old scheduler, try to restore the old scheduler while still returning the error code. Link: https://sashiko.dev/#/patchset/20260519015506.634185-1-rosenp%40gmail.com Fixes: 05f00505a89a ("ipvs: fix crash if scheduler is changed") Signed-off-by: Julian Anastasov Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 3 +-- net/netfilter/ipvs/ip_vs_ctl.c | 13 ++++++++----- net/netfilter/ipvs/ip_vs_sched.c | 14 +++++++------- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a02e569813d2..e517eaaa177b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1824,8 +1824,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler); int ip_vs_bind_scheduler(struct ip_vs_service *svc, struct ip_vs_scheduler *scheduler); -void ip_vs_unbind_scheduler(struct ip_vs_service *svc, - struct ip_vs_scheduler *sched); +void ip_vs_unbind_scheduler(struct ip_vs_service *svc); struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); struct ip_vs_conn * diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index bd9cae44d214..16daba8cac83 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1898,7 +1898,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, if (ret_hooks >= 0) ip_vs_unregister_hooks(ipvs, u->af); if (svc != NULL) { - ip_vs_unbind_scheduler(svc, sched); + ip_vs_unbind_scheduler(svc); ip_vs_service_free(svc); } ip_vs_scheduler_put(sched); @@ -1962,9 +1962,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) old_sched = rcu_dereference_protected(svc->scheduler, 1); if (sched != old_sched) { if (old_sched) { - ip_vs_unbind_scheduler(svc, old_sched); - RCU_INIT_POINTER(svc->scheduler, NULL); - /* Wait all svc->sched_data users */ + ip_vs_unbind_scheduler(svc); + /* Wait all svc->scheduler/sched_data users */ synchronize_rcu(); } /* Bind the new scheduler */ @@ -1972,6 +1971,10 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) ret = ip_vs_bind_scheduler(svc, sched); if (ret) { ip_vs_scheduler_put(sched); + /* Try to restore the old_sched */ + if (old_sched && + !ip_vs_bind_scheduler(svc, old_sched)) + old_sched = NULL; goto out; } } @@ -2027,7 +2030,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind scheduler */ old_sched = rcu_dereference_protected(svc->scheduler, 1); - ip_vs_unbind_scheduler(svc, old_sched); + ip_vs_unbind_scheduler(svc); ip_vs_scheduler_put(old_sched); /* Unbind persistence engine, keep svc->pe */ diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index c6e421c4e299..24adc38942a0 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -56,19 +56,19 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc, /* * Unbind a service with its scheduler */ -void ip_vs_unbind_scheduler(struct ip_vs_service *svc, - struct ip_vs_scheduler *sched) +void ip_vs_unbind_scheduler(struct ip_vs_service *svc) { - struct ip_vs_scheduler *cur_sched; + struct ip_vs_scheduler *sched; - cur_sched = rcu_dereference_protected(svc->scheduler, 1); - /* This check proves that old 'sched' was installed */ - if (!cur_sched) + sched = rcu_dereference_protected(svc->scheduler, 1); + if (!sched) return; + /* Reset the scheduler before initiating any RCU callbacks */ + rcu_assign_pointer(svc->scheduler, NULL); + smp_wmb(); /* paired with smp_rmb() in ip_vs_schedule() */ if (sched->done_service) sched->done_service(svc); - /* svc->scheduler can be set to NULL only by caller */ } -- cgit v1.2.3 From 5057e1aca011e51ef51498c940ef96f3d3e8a305 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Sun, 31 May 2026 12:08:12 -0400 Subject: net/sched: act_api: use RCU with deferred freeing for action lifecycle When NEWTFILTER and DELFILTER are run concurrently it is possible to create a race with an associated action. Let's illustrate with CPU0 running NEWTFILTER and CPU1 running DELFILTER: 0: mutex_lock() <-- holds the idr lock 0: rcu_read_lock() 0: p = idr_find(idr, index) <-- action p is valid (RCU protects IDR) 0: mutex_unlock() <-- releases the idr lock 1: refcount_dec_and_mutex_lock() <-- refcnt 1->0, mutex held 1: idr_remove(idr, index) <-- Action removed from IDR 1: mutex_unlock() <-- mutex released allowing us to delete the action 1: tcf_action_cleanup(p); kfree(p) <-- Kfrees p immediately, no deferral 0: refcount_inc_not_zero(&p->tcfa_refcnt) <-- ouch, UAF p points to freed memory This patch fixes the race condition between NEWTFILTER and DELFILTER by adding struct rcu_head to tc_action used in the deferral and introducing a call_rcu() in the delete path to defer the final kfree(). Note: this is a revert of commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu") but also modernization/simplification to directly use kfree_rcu(). Let's illustrate the new restored code path: 0: rcu_read_lock() 1: refcount_dec_and_mutex_lock() <-- refcnt 1->0, mutex held 1: idr_remove(idr, index) 1: mutex_unlock() 1: call_rcu(&p->tcfa_rcu, tcf_action_rcu_free) <-- defer kfree after grace period 0: p = idr_find(idr, index) 0: refcount_inc_not_zero(&p->tcfa_refcnt) <-- fails, refcnt already 0 1: rcu_read_unlock() <-- release so freeing can run after grace period After CPU1 calls idr_remove(), the object is no longer reachable through the IDR. CPU0's subsequent idr_find() will return NULL, and even if it still held a stale pointer, the immediate kfree() is now deferred until after the RCU grace period, so no UAF can occur. Fixes: d7fb60b9cafb ("net_sched: get rid of tcfa_rcu") Suggested-by: Jakub Kicinski Reported-by: Kyle Zeng Tested-by: Victor Nogueira Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: Jamal Hadi Salim Tested-by: Kyle Zeng Reviewed-by: Pedro Tammela Reviewed-by: Eric Dumazet Reviewed-by: Victor Nogueira Link: https://patch.msgid.link/20260531160812.68020-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- include/net/act_api.h | 1 + net/sched/act_api.c | 7 +------ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index d11b79107930..fd2967ee08f7 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -45,6 +45,7 @@ struct tc_action { struct tc_cookie __rcu *user_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; + struct rcu_head tcfa_rcu; u8 hw_stats; u8 used_hw_stats; bool used_hw_stats_valid; diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 332fd9695e54..04ea11c90e03 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -112,11 +112,6 @@ struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, } EXPORT_SYMBOL(tcf_action_set_ctrlact); -/* XXX: For standalone actions, we don't need a RCU grace period either, because - * actions are always connected to filters and filters are already destroyed in - * RCU callbacks, so after a RCU grace period actions are already disconnected - * from filters. Readers later can not find us. - */ static void free_tcf(struct tc_action *p) { struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); @@ -129,7 +124,7 @@ static void free_tcf(struct tc_action *p) if (chain) tcf_chain_put_by_act(chain); - kfree(p); + kfree_rcu(p, tcfa_rcu); } static void offload_action_hw_count_set(struct tc_action *act, -- cgit v1.2.3 From 6d99479799c69c3cb588fcda19c81d8f61d64ecd Mon Sep 17 00:00:00 2001 From: Qing Wang Date: Tue, 2 Jun 2026 11:08:54 +0800 Subject: rseq: Fix using an uninitialized stack variable in rseq_exit_user_update() There is an bug in which an uninitialized stack variable is used in rseq_exit_user_update() as reported by syzbot: BUG: KMSAN: kernel-infoleak in rseq_set_ids_get_csaddr include/linux/rseq_entry.h:502 [inline] The local variable: struct rseq_ids ids = { .cpu_id = task_cpu(t), .mm_cid = task_mm_cid(t), .node_id = cpu_to_node(ids.cpu_id), }; According to the C standard, the evaluation order of expressions in an initializer list is indeterminately sequenced. The compiler (Clang, in this KMSAN build) evaluates `cpu_to_node(ids.cpu_id)` *before* `ids.cpu_id` is initialized with `task_cpu(t)`. This is fixed by moving the assignment of ids.node_id outside the structure initialization. Fixes: 82f572449cfe ("rseq: Implement read only ABI enforcement for optimized RSEQ V2 mode") Closes: https://syzkaller.appspot.com/bug?extid=185a631927096f9da2fc Reported-by: syzbot+185a631927096f9da2fc@syzkaller.appspotmail.com Signed-off-by: Qing Wang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Link: https://patch.msgid.link/20260602030854.574038-1-wangqing7171@gmail.com --- include/linux/rseq_entry.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/rseq_entry.h b/include/linux/rseq_entry.h index 63bc72086e75..ed9da6e41a2a 100644 --- a/include/linux/rseq_entry.h +++ b/include/linux/rseq_entry.h @@ -635,10 +635,11 @@ static __always_inline bool rseq_exit_user_update(struct pt_regs *regs, struct t return true; } + int cpu = task_cpu(t); struct rseq_ids ids = { - .cpu_id = task_cpu(t), + .cpu_id = cpu, .mm_cid = task_mm_cid(t), - .node_id = cpu_to_node(ids.cpu_id), + .node_id = cpu_to_node(cpu), }; return rseq_update_usr(t, regs, &ids); -- cgit v1.2.3 From e32d7f404d7d9dac307c1cd9a1fe132fa62ab6d6 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:29 +0200 Subject: rv: Reset per-task DA monitors before releasing the slot Per-task monitors use task_mon_slot to determine which slot in the array to use for the monitor. During destruction, this slot is returned but this is done before resetting the monitor. As a result, the monitor's reset is in fact resetting a slot that is outside of the array (RV_PER_TASK_MONITOR_INIT). Release the slot only after the reset to avoid out-of-bound memory access. Fixes: f5587d1b6ec93 ("rv: Add Hybrid Automata monitor type") Cc: stable@vger.kernel.org Suggested-by: Wen Yang Reviewed-by: Wen Yang Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-3-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 39765ff6f098..1459fb3dfee6 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -309,10 +309,11 @@ static inline void da_monitor_destroy(void) WARN_ONCE(1, "Disabling a disabled monitor: " __stringify(MONITOR_NAME)); return; } - rv_put_task_monitor_slot(task_mon_slot); - task_mon_slot = RV_PER_TASK_MONITOR_INIT; da_monitor_reset_all(); + + rv_put_task_monitor_slot(task_mon_slot); + task_mon_slot = RV_PER_TASK_MONITOR_INIT; } #elif RV_MON_TYPE == RV_MON_PER_OBJ -- cgit v1.2.3 From c3d016ea823a9941ab8cbcef01a500821ff0cf16 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:30 +0200 Subject: rv: Prevent in-flight per-task handlers from using invalid slots Per-task monitors use a slot in the task_struct->rv[] array and store that locally (e.g. task_mon_slot), this slot is returned during the destruction process but currently hanlers can be running while that slot is returning and this race may lead to accessing an invalid slot. Synchronise with all in-flight tracepoint handlers using tracepoint_synchronize_unregister() before returning the slot. Fixes: f5587d1b6ec9 ("rv: Add Hybrid Automata monitor type") Fixes: a9769a5b9878 ("rv: Add support for LTL monitors") Suggested-by: Wen Yang Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-4-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 4 ++++ include/rv/ltl_monitor.h | 1 + 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 1459fb3dfee6..cc97cc5dfbfd 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -302,6 +302,9 @@ static int da_monitor_init(void) /* * da_monitor_destroy - return the allocated slot + * + * Wait for all in-flight handlers before returning the slot to avoid + * out-of-bound accesses. */ static inline void da_monitor_destroy(void) { @@ -310,6 +313,7 @@ static inline void da_monitor_destroy(void) return; } + tracepoint_synchronize_unregister(); da_monitor_reset_all(); rv_put_task_monitor_slot(task_mon_slot); diff --git a/include/rv/ltl_monitor.h b/include/rv/ltl_monitor.h index eff60cd61106..38e792401f76 100644 --- a/include/rv/ltl_monitor.h +++ b/include/rv/ltl_monitor.h @@ -77,6 +77,7 @@ static void ltl_monitor_destroy(void) { rv_detach_trace_probe(name, task_newtask, handle_task_newtask); + tracepoint_synchronize_unregister(); rv_put_task_monitor_slot(ltl_monitor_slot); ltl_monitor_slot = RV_PER_TASK_MONITOR_INIT; } -- cgit v1.2.3 From 32171d828ab964dc1f05f2056a81477522a3361b Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:31 +0200 Subject: rv: Ensure all pending probes terminate on per-obj monitor destroy The monitor disable/destroy sequence detaches all probes and resets the monitor's data, however it doesn't wait for pending probes. This is an issue with per-object monitors, which free the monitor storage. Call tracepoint_synchronize_unregister() to make sure to wait for all pending probes before destroying the monitor storage. Fixes: 4a24127bd6cb ("rv: Add support for per-object monitors in DA/HA") Reviewed-by: Wen Yang Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-5-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index cc97cc5dfbfd..a7e103654406 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -511,9 +511,10 @@ static inline void da_monitor_destroy(void) struct hlist_node *tmp; int bkt; + tracepoint_synchronize_unregister(); /* - * This function is called after all probes are disabled, we need only - * worry about concurrency against old events. + * This function is called after all probes are disabled and no longer + * pending, we can safely assume no concurrent user. */ synchronize_rcu(); hash_for_each_safe(da_monitor_ht, bkt, tmp, mon_storage, node) { -- cgit v1.2.3 From 2b9a8c27417eeef01967c6297def426a44776c04 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Mon, 1 Jun 2026 17:38:32 +0200 Subject: rv: Fix monitor start ordering and memory ordering for monitoring flag da_monitor_start() set monitoring=1 before calling da_monitor_init_hook(), may racing with the sched_switch handler: da_monitor_start() sched_switch handler ------------------------- --------------------------------- da_mon->monitoring = 1; if (da_monitoring(da_mon)) /* true */ ha_start_timer_ns(...); /* hrtimer->base == NULL, crash */ da_monitor_init_hook(da_mon); /* hrtimer_setup() sets base */ Fix the ordering and pair with release/acquire semantics: da_monitor_init_hook(da_mon); smp_store_release(&da_mon->monitoring, 1); /* da_monitor_start() */ return smp_load_acquire(&da_mon->monitoring); /* da_monitoring() */ On ARM64 a plain STR + LDR does not form a release-acquire pair, so the load can observe monitoring=1 while hrtimer->base is still NULL. The plain accesses are also data races under KCSAN. Use WRITE_ONCE for the monitoring=0 store in da_monitor_reset() to cover the reset path. Fixes: 792575348ff7 ("rv/include: Add deterministic automata monitor definition via C macros") Signed-off-by: Wen Yang Reviewed-by: Gabriele Monaco Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-6-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index a7e103654406..60dc39f2620a 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -82,7 +82,7 @@ static void react(enum states curr_state, enum events event) static inline void da_monitor_reset(struct da_monitor *da_mon) { da_monitor_reset_hook(da_mon); - da_mon->monitoring = 0; + WRITE_ONCE(da_mon->monitoring, 0); da_mon->curr_state = model_get_initial_state(); } @@ -95,8 +95,9 @@ static inline void da_monitor_reset(struct da_monitor *da_mon) static inline void da_monitor_start(struct da_monitor *da_mon) { da_mon->curr_state = model_get_initial_state(); - da_mon->monitoring = 1; da_monitor_init_hook(da_mon); + /* Pairs with smp_load_acquire in da_monitoring(). */ + smp_store_release(&da_mon->monitoring, 1); } /* @@ -104,7 +105,8 @@ static inline void da_monitor_start(struct da_monitor *da_mon) */ static inline bool da_monitoring(struct da_monitor *da_mon) { - return da_mon->monitoring; + /* Pairs with smp_store_release in da_monitor_start(). */ + return smp_load_acquire(&da_mon->monitoring); } /* -- cgit v1.2.3 From 713c9e1ea7623846f8e0bb657f1b9d211d1a61e6 Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:33 +0200 Subject: rv: Do not rely on clean monitor when initialising HA Hybrid Automata monitors hook into the DA implementation when doing da_monitor_reset(). This function is called both on initialisation and teardown, HA monitors try to cancel a timer only when it's initialised relying on the da_mon->monitoring flag. This flag could however be corrupted during initialisation. This happens for instance on per-task monitors that share the same storage with different type of monitors like LTL or in case of races during a previous teardown. Stop relying on the monitoring flag during initialisation, assume that can have any value, so use a separate da_reset_state() skiping timer cancellation. New monitors (e.g. new tasks) are always zero-initialised so it is safe to rely on the monitoring flag for those. Reported-by: Wen Yang Closes: https://lore.kernel.org/lkml/d02c656aada7d071f083460a5c9a454363669b61.1778522945.git.wen.yang@linux.dev Suggested-by: Nam Cao Fixes: f5587d1b6ec9 ("rv: Add Hybrid Automata monitor type") Reviewed-by: Wen Yang Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-7-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 91 ++++++++++++++++++++++++++++++++++++++++--------- include/rv/ha_monitor.h | 2 +- 2 files changed, 76 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 60dc39f2620a..ec9bc88bd4c4 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -76,14 +76,22 @@ static void react(enum states curr_state, enum events event) model_get_state_name(curr_state)); } +/* + * da_monitor_reset_state - reset a monitor and setting it to init state + */ +static inline void da_monitor_reset_state(struct da_monitor *da_mon) +{ + WRITE_ONCE(da_mon->monitoring, 0); + da_mon->curr_state = model_get_initial_state(); +} + /* * da_monitor_reset - reset a monitor and setting it to init state */ static inline void da_monitor_reset(struct da_monitor *da_mon) { da_monitor_reset_hook(da_mon); - WRITE_ONCE(da_mon->monitoring, 0); - da_mon->curr_state = model_get_initial_state(); + da_monitor_reset_state(da_mon); } /* @@ -158,12 +166,28 @@ static struct da_monitor *da_get_monitor(void) return &DA_MON_NAME; } +/* + * __da_monitor_reset_all - reset the single monitor + */ +static void __da_monitor_reset_all(void (*reset)(struct da_monitor *)) +{ + reset(da_get_monitor()); +} + /* * da_monitor_reset_all - reset the single monitor */ static void da_monitor_reset_all(void) { - da_monitor_reset(da_get_monitor()); + __da_monitor_reset_all(da_monitor_reset); +} + +/* + * da_monitor_reset_state_all - reset the single monitor + */ +static inline void da_monitor_reset_state_all(void) +{ + __da_monitor_reset_all(da_monitor_reset_state); } /* @@ -171,7 +195,7 @@ static void da_monitor_reset_all(void) */ static inline int da_monitor_init(void) { - da_monitor_reset_all(); + da_monitor_reset_state_all(); return 0; } @@ -202,25 +226,41 @@ static struct da_monitor *da_get_monitor(void) } /* - * da_monitor_reset_all - reset all CPUs' monitor + * __da_monitor_reset_all - reset all CPUs' monitor */ -static void da_monitor_reset_all(void) +static void __da_monitor_reset_all(void (*reset)(struct da_monitor *)) { struct da_monitor *da_mon; int cpu; for_each_cpu(cpu, cpu_online_mask) { da_mon = per_cpu_ptr(&DA_MON_NAME, cpu); - da_monitor_reset(da_mon); + reset(da_mon); } } +/* + * da_monitor_reset_all - reset all CPUs' monitor + */ +static void da_monitor_reset_all(void) +{ + __da_monitor_reset_all(da_monitor_reset); +} + +/* + * da_monitor_reset_state_all - reset all CPUs' monitor + */ +static inline void da_monitor_reset_state_all(void) +{ + __da_monitor_reset_all(da_monitor_reset_state); +} + /* * da_monitor_init - initialize all CPUs' monitor */ static inline int da_monitor_init(void) { - da_monitor_reset_all(); + da_monitor_reset_state_all(); return 0; } @@ -269,19 +309,29 @@ static inline da_id_type da_get_id(struct da_monitor *da_mon) return da_get_target(da_mon)->pid; } -static void da_monitor_reset_all(void) +static void __da_monitor_reset_all(void (*reset)(struct da_monitor *)) { struct task_struct *g, *p; int cpu; read_lock(&tasklist_lock); for_each_process_thread(g, p) - da_monitor_reset(da_get_monitor(p)); + reset(da_get_monitor(p)); for_each_present_cpu(cpu) - da_monitor_reset(da_get_monitor(idle_task(cpu))); + reset(da_get_monitor(idle_task(cpu))); read_unlock(&tasklist_lock); } +static void da_monitor_reset_all(void) +{ + __da_monitor_reset_all(da_monitor_reset); +} + +static inline void da_monitor_reset_state_all(void) +{ + __da_monitor_reset_all(da_monitor_reset_state); +} + /* * da_monitor_init - initialize the per-task monitor * @@ -298,7 +348,7 @@ static int da_monitor_init(void) task_mon_slot = slot; - da_monitor_reset_all(); + da_monitor_reset_state_all(); return 0; } @@ -490,15 +540,24 @@ static inline void da_destroy_storage(da_id_type id) kfree_rcu(mon_storage, rcu); } -static void da_monitor_reset_all(void) +static void __da_monitor_reset_all(void (*reset)(struct da_monitor *)) { struct da_monitor_storage *mon_storage; int bkt; - rcu_read_lock(); + guard(rcu)(); hash_for_each_rcu(da_monitor_ht, bkt, mon_storage, node) - da_monitor_reset(&mon_storage->rv.da_mon); - rcu_read_unlock(); + reset(&mon_storage->rv.da_mon); +} + +static void da_monitor_reset_all(void) +{ + __da_monitor_reset_all(da_monitor_reset); +} + +static inline void da_monitor_reset_state_all(void) +{ + __da_monitor_reset_all(da_monitor_reset_state); } static inline int da_monitor_init(void) diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h index d59507e8cb30..bd87055567cc 100644 --- a/include/rv/ha_monitor.h +++ b/include/rv/ha_monitor.h @@ -153,12 +153,12 @@ static inline void ha_monitor_init_env(struct da_monitor *da_mon) * Called from a hook in the DA reset functions, it supplies the da_mon * corresponding to the current ha_mon. * Not all hybrid automata require the timer, still clear it for simplicity. + * Monitors that never started have their timer uninitialized, do not stop those. */ static inline void ha_monitor_reset_env(struct da_monitor *da_mon) { struct ha_monitor *ha_mon = to_ha_monitor(da_mon); - /* Initialisation resets the monitor before initialising the timer */ if (likely(da_monitoring(da_mon))) ha_cancel_timer(ha_mon); } -- cgit v1.2.3 From 700782ec8f6589c5792b323efd6e004dd183328b Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:34 +0200 Subject: rv: Add automatic cleanup handlers for per-task HA monitors Hybrid automata monitors may start timers, depending on the model, these may remain active on an exiting task and cause false positives or even access freed memory. Add an enable/disable hook in the HA code, currently only populated by the per-task handler for registration and deregistration. This hooks to the sched_process_exit event and ensures the timer is stopped for every exiting task. The handler is enabled automatically but may be disabled, for instance if the monitor uses the event for another purpose (but should still manually ensure timers are stopped). Fixes: f5587d1b6ec9 ("rv: Add Hybrid Automata monitor type") Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-8-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/ha_monitor.h | 60 ++++++++++++++++++++++ kernel/trace/rv/monitors/nomiss/nomiss.c | 4 +- kernel/trace/rv/monitors/opid/opid.c | 4 +- kernel/trace/rv/monitors/stall/stall.c | 4 +- .../rvgen/rvgen/templates/dot2k/main.c | 4 +- 5 files changed, 68 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h index bd87055567cc..4002b5247c46 100644 --- a/include/rv/ha_monitor.h +++ b/include/rv/ha_monitor.h @@ -28,6 +28,7 @@ static inline void ha_monitor_init_env(struct da_monitor *da_mon); static inline void ha_monitor_reset_env(struct da_monitor *da_mon); static inline void ha_setup_timer(struct ha_monitor *ha_mon); static inline bool ha_cancel_timer(struct ha_monitor *ha_mon); +static inline void ha_cancel_timer_sync(struct ha_monitor *ha_mon); static bool ha_monitor_handle_constraint(struct da_monitor *da_mon, enum states curr_state, enum events event, @@ -37,6 +38,26 @@ static bool ha_monitor_handle_constraint(struct da_monitor *da_mon, #define da_monitor_init_hook ha_monitor_init_env #define da_monitor_reset_hook ha_monitor_reset_env +#if !defined(HA_SKIP_AUTO_CLEANUP) && RV_MON_TYPE == RV_MON_PER_TASK +/* + * Automatic cleanup handlers for per-task HA monitors, only skip if you know + * what you are doing (e.g. you want to implement cleanup manually in another + * handler doing more things). + */ +static void ha_handle_sched_process_exit(void *data, struct task_struct *p, + bool group_dead); + +#define ha_monitor_enable_hook() \ + rv_attach_trace_probe(__stringify(MONITOR_NAME), sched_process_exit, \ + ha_handle_sched_process_exit) +#define ha_monitor_disable_hook() \ + rv_detach_trace_probe(__stringify(MONITOR_NAME), sched_process_exit, \ + ha_handle_sched_process_exit) +#else +#define ha_monitor_enable_hook() ((void)0) +#define ha_monitor_disable_hook() ((void)0) +#endif + #include #include @@ -115,6 +136,22 @@ static enum hrtimer_restart ha_monitor_timer_callback(struct hrtimer *hrtimer); #define ha_get_ns() 0 #endif /* HA_CLK_NS */ +static int ha_monitor_init(void) +{ + int ret; + + ret = da_monitor_init(); + if (ret == 0) + ha_monitor_enable_hook(); + return ret; +} + +static void ha_monitor_destroy(void) +{ + ha_monitor_disable_hook(); + da_monitor_destroy(); +} + /* Should be supplied by the monitor */ static u64 ha_get_env(struct ha_monitor *ha_mon, enum envs env, u64 time_ns); static bool ha_verify_constraint(struct ha_monitor *ha_mon, @@ -200,6 +237,20 @@ static inline void ha_trace_error_env(struct ha_monitor *ha_mon, { CONCATENATE(trace_error_env_, MONITOR_NAME)(id, curr_state, event, env); } + +#if !defined(HA_SKIP_AUTO_CLEANUP) && RV_MON_TYPE == RV_MON_PER_TASK +static void ha_handle_sched_process_exit(void *data, struct task_struct *p, + bool group_dead) +{ + struct da_monitor *da_mon = da_get_monitor(p); + + if (likely(da_monitoring(da_mon))) { + da_monitor_reset(da_mon); + ha_cancel_timer_sync(to_ha_monitor(da_mon)); + } +} +#endif + #endif /* RV_MON_TYPE */ /* @@ -412,6 +463,10 @@ static inline bool ha_cancel_timer(struct ha_monitor *ha_mon) { return timer_delete(&ha_mon->timer); } +static inline void ha_cancel_timer_sync(struct ha_monitor *ha_mon) +{ + timer_delete_sync(&ha_mon->timer); +} #elif HA_TIMER_TYPE == HA_TIMER_HRTIMER /* * Helper functions to handle the monitor timer. @@ -463,6 +518,10 @@ static inline bool ha_cancel_timer(struct ha_monitor *ha_mon) { return hrtimer_try_to_cancel(&ha_mon->hrtimer) == 1; } +static inline void ha_cancel_timer_sync(struct ha_monitor *ha_mon) +{ + hrtimer_cancel(&ha_mon->hrtimer); +} #else /* HA_TIMER_NONE */ /* * Start function is intentionally not defined, monitors using timers must @@ -473,6 +532,7 @@ static inline bool ha_cancel_timer(struct ha_monitor *ha_mon) { return false; } +static inline void ha_cancel_timer_sync(struct ha_monitor *ha_mon) { } #endif #endif diff --git a/kernel/trace/rv/monitors/nomiss/nomiss.c b/kernel/trace/rv/monitors/nomiss/nomiss.c index 31f90f3638d8..8ead8783c29f 100644 --- a/kernel/trace/rv/monitors/nomiss/nomiss.c +++ b/kernel/trace/rv/monitors/nomiss/nomiss.c @@ -227,7 +227,7 @@ static int enable_nomiss(void) { int retval; - retval = da_monitor_init(); + retval = ha_monitor_init(); if (retval) return retval; @@ -263,7 +263,7 @@ static void disable_nomiss(void) rv_detach_trace_probe("nomiss", sched_switch, handle_sched_switch); rv_detach_trace_probe("nomiss", sched_wakeup, handle_sched_wakeup); - da_monitor_destroy(); + ha_monitor_destroy(); } static struct rv_monitor rv_this = { diff --git a/kernel/trace/rv/monitors/opid/opid.c b/kernel/trace/rv/monitors/opid/opid.c index 4594c7c46601..2922318c6112 100644 --- a/kernel/trace/rv/monitors/opid/opid.c +++ b/kernel/trace/rv/monitors/opid/opid.c @@ -73,7 +73,7 @@ static int enable_opid(void) { int retval; - retval = da_monitor_init(); + retval = ha_monitor_init(); if (retval) return retval; @@ -90,7 +90,7 @@ static void disable_opid(void) rv_detach_trace_probe("opid", sched_set_need_resched_tp, handle_sched_need_resched); rv_detach_trace_probe("opid", sched_waking, handle_sched_waking); - da_monitor_destroy(); + ha_monitor_destroy(); } /* diff --git a/kernel/trace/rv/monitors/stall/stall.c b/kernel/trace/rv/monitors/stall/stall.c index 9ccfda6b0e73..3c38fb1a0159 100644 --- a/kernel/trace/rv/monitors/stall/stall.c +++ b/kernel/trace/rv/monitors/stall/stall.c @@ -103,7 +103,7 @@ static int enable_stall(void) { int retval; - retval = da_monitor_init(); + retval = ha_monitor_init(); if (retval) return retval; @@ -120,7 +120,7 @@ static void disable_stall(void) rv_detach_trace_probe("stall", sched_switch, handle_sched_switch); rv_detach_trace_probe("stall", sched_wakeup, handle_sched_wakeup); - da_monitor_destroy(); + ha_monitor_destroy(); } static struct rv_monitor rv_this = { diff --git a/tools/verification/rvgen/rvgen/templates/dot2k/main.c b/tools/verification/rvgen/rvgen/templates/dot2k/main.c index bf0999f6657a..889446760e3c 100644 --- a/tools/verification/rvgen/rvgen/templates/dot2k/main.c +++ b/tools/verification/rvgen/rvgen/templates/dot2k/main.c @@ -35,7 +35,7 @@ static int enable_%%MODEL_NAME%%(void) { int retval; - retval = da_monitor_init(); + retval = %%MONITOR_CLASS%%_monitor_init(); if (retval) return retval; @@ -50,7 +50,7 @@ static void disable_%%MODEL_NAME%%(void) %%TRACEPOINT_DETACH%% - da_monitor_destroy(); + %%MONITOR_CLASS%%_monitor_destroy(); } /* -- cgit v1.2.3 From 74e17bd6fc8ed7e30363bb78d4c50b38cfd71efe Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:35 +0200 Subject: rv: Ensure synchronous cleanup for HA monitors HA monitors may start timers, all cleanup functions currently stop the timers asynchronously to avoid sleeping in the wrong context. Nothing makes sure running callbacks terminate on cleanup. Run the entire HA timer callback in an RCU read-side critical section, this way we can simply synchronize_rcu() with any pending timer and are sure any cleanup using kfree_rcu() runs after callbacks terminated. Additionally make sure any unlikely callback running late won't run any code if the monitor is marked as disabled or if destruction started. Use memory barriers to serialise with racing resets. Fixes: f5587d1b6ec9 ("rv: Add Hybrid Automata monitor type") Fixes: 4a24127bd6cb ("rv: Add support for per-object monitors in DA/HA") Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-9-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 19 ++++++++++++++++--- include/rv/ha_monitor.h | 29 ++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index ec9bc88bd4c4..1f440c7818e6 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -57,6 +57,15 @@ static struct rv_monitor rv_this; #define da_monitor_reset_hook(da_mon) #endif +/* + * Hook to allow the implementation of hybrid automata: define it with a + * function that waits for the termination of all monitors background + * activities (e.g. all timers). This hook can sleep. + */ +#ifndef da_monitor_sync_hook +#define da_monitor_sync_hook() +#endif + /* * Type for the target id, default to int but can be overridden. * A long type can work as hash table key (PER_OBJ) but will be downgraded to @@ -82,7 +91,8 @@ static void react(enum states curr_state, enum events event) static inline void da_monitor_reset_state(struct da_monitor *da_mon) { WRITE_ONCE(da_mon->monitoring, 0); - da_mon->curr_state = model_get_initial_state(); + /* Pair with load in __ha_monitor_timer_callback */ + smp_store_release(&da_mon->curr_state, model_get_initial_state()); } /* @@ -205,6 +215,7 @@ static inline int da_monitor_init(void) static inline void da_monitor_destroy(void) { da_monitor_reset_all(); + da_monitor_sync_hook(); } #elif RV_MON_TYPE == RV_MON_PER_CPU @@ -270,6 +281,7 @@ static inline int da_monitor_init(void) static inline void da_monitor_destroy(void) { da_monitor_reset_all(); + da_monitor_sync_hook(); } #elif RV_MON_TYPE == RV_MON_PER_TASK @@ -367,6 +379,7 @@ static inline void da_monitor_destroy(void) tracepoint_synchronize_unregister(); da_monitor_reset_all(); + da_monitor_sync_hook(); rv_put_task_monitor_slot(task_mon_slot); task_mon_slot = RV_PER_TASK_MONITOR_INIT; @@ -573,13 +586,13 @@ static inline void da_monitor_destroy(void) int bkt; tracepoint_synchronize_unregister(); + da_monitor_reset_all(); + da_monitor_sync_hook(); /* * This function is called after all probes are disabled and no longer * pending, we can safely assume no concurrent user. */ - synchronize_rcu(); hash_for_each_safe(da_monitor_ht, bkt, tmp, mon_storage, node) { - da_monitor_reset_hook(&mon_storage->rv.da_mon); hash_del_rcu(&mon_storage->node); kfree(mon_storage); } diff --git a/include/rv/ha_monitor.h b/include/rv/ha_monitor.h index 4002b5247c46..28d3c74cabfc 100644 --- a/include/rv/ha_monitor.h +++ b/include/rv/ha_monitor.h @@ -37,6 +37,7 @@ static bool ha_monitor_handle_constraint(struct da_monitor *da_mon, #define da_monitor_event_hook ha_monitor_handle_constraint #define da_monitor_init_hook ha_monitor_init_env #define da_monitor_reset_hook ha_monitor_reset_env +#define da_monitor_sync_hook() synchronize_rcu() #if !defined(HA_SKIP_AUTO_CLEANUP) && RV_MON_TYPE == RV_MON_PER_TASK /* @@ -136,10 +137,13 @@ static enum hrtimer_restart ha_monitor_timer_callback(struct hrtimer *hrtimer); #define ha_get_ns() 0 #endif /* HA_CLK_NS */ +static bool ha_mon_destroying; + static int ha_monitor_init(void) { int ret; + WRITE_ONCE(ha_mon_destroying, false); ret = da_monitor_init(); if (ret == 0) ha_monitor_enable_hook(); @@ -148,6 +152,7 @@ static int ha_monitor_init(void) static void ha_monitor_destroy(void) { + WRITE_ONCE(ha_mon_destroying, true); ha_monitor_disable_hook(); da_monitor_destroy(); } @@ -288,12 +293,30 @@ static bool ha_monitor_handle_constraint(struct da_monitor *da_mon, return false; } +/* + * __ha_monitor_timer_callback - generic callback representation + * + * This callback runs in an RCU read-side critical section to allow the + * destruction sequence to easily synchronize_rcu() with all pending timers + * after asynchronously disabling them. The ha_mon_destroying check ensures + * any callback entering the RCU section after synchronize_rcu() completes + * will see the flag and bail out immediately. + */ static inline void __ha_monitor_timer_callback(struct ha_monitor *ha_mon) { - enum states curr_state = READ_ONCE(ha_mon->da_mon.curr_state); DECLARE_SEQ_BUF(env_string, ENV_BUFFER_SIZE); - u64 time_ns = ha_get_ns(); - + enum states curr_state; + u64 time_ns; + + guard(rcu)(); + if (unlikely(READ_ONCE(ha_mon_destroying))) + return; + /* Ensure consistent curr_state if we race with da_monitor_reset */ + curr_state = smp_load_acquire(&ha_mon->da_mon.curr_state); + if (unlikely(!da_monitor_handling_event(&ha_mon->da_mon))) + return; + + time_ns = ha_get_ns(); ha_get_env_string(&env_string, ha_mon, time_ns); ha_react(curr_state, EVENT_NONE, env_string.buffer); ha_trace_error_env(ha_mon, model_get_state_name(curr_state), -- cgit v1.2.3 From 7c147fae71f3b3542ba3292d2099ef7237cfc0da Mon Sep 17 00:00:00 2001 From: Gabriele Monaco Date: Mon, 1 Jun 2026 17:38:36 +0200 Subject: rv: Prevent task migration while handling per-CPU events Tracepoint handlers are fully preemptible after a46023d5616 ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast"). When a per-CPU monitor handles an event, it retrieves the monitor state using a per-CPU pointer. If the event itself doesn't disable preemption, the task can migrate to a different CPU and we risk updating the wrong monitor. Mitigate this by explicitly disabling task migration before acquiring the monitor pointer. This cannot guarantee the monitor runs on the correct CPU but reduces the race condition window and prevents warnings. Reviewed-by: Wen Yang Reviewed-by: Nam Cao Link: https://lore.kernel.org/r/20260601153840.124372-10-gmonaco@redhat.com Signed-off-by: Gabriele Monaco --- include/rv/da_monitor.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 1f440c7818e6..34b8fba9ecd4 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -218,6 +218,10 @@ static inline void da_monitor_destroy(void) da_monitor_sync_hook(); } +#ifndef da_implicit_guard +#define da_implicit_guard() +#endif + #elif RV_MON_TYPE == RV_MON_PER_CPU /* * Functions to define, init and get a per-cpu monitor. @@ -284,6 +288,10 @@ static inline void da_monitor_destroy(void) da_monitor_sync_hook(); } +#ifndef da_implicit_guard +#define da_implicit_guard() guard(migrate)() +#endif + #elif RV_MON_TYPE == RV_MON_PER_TASK /* * Functions to define, init and get a per-task monitor. @@ -756,6 +764,7 @@ static inline bool __da_handle_start_run_event(struct da_monitor *da_mon, */ static inline void da_handle_event(enum events event) { + da_implicit_guard(); __da_handle_event(da_get_monitor(), event, 0); } @@ -771,6 +780,7 @@ static inline void da_handle_event(enum events event) */ static inline bool da_handle_start_event(enum events event) { + da_implicit_guard(); return __da_handle_start_event(da_get_monitor(), event, 0); } @@ -782,6 +792,7 @@ static inline bool da_handle_start_event(enum events event) */ static inline bool da_handle_start_run_event(enum events event) { + da_implicit_guard(); return __da_handle_start_run_event(da_get_monitor(), event, 0); } -- cgit v1.2.3 From dd214733544427587a95f66dbf3adff072568990 Mon Sep 17 00:00:00 2001 From: Michael Bommarito Date: Thu, 21 May 2026 10:45:17 -0400 Subject: Bluetooth: L2CAP: reject BR/EDR signaling packets over MTUsig net/bluetooth/l2cap_core.c:l2cap_sig_channel() accepts BR/EDR signaling packets up to the channel MTU and dispatches each command without enforcing the signaling MTU (MTUsig). A Bluetooth BR/EDR peer within radio range can send a fixed-channel CID 0x0001 packet that is larger than MTUsig and contains many L2CAP_ECHO_REQ commands before pairing. In a real-radio stock-kernel run, one 681-byte signaling packet containing 168 zero-length ECHO_REQ commands made the target transmit 168 ECHO_RSP frames over about 220 ms. Impact: a Bluetooth BR/EDR peer within radio range, before pairing, can force 168 ECHO_RSP frames from one 681-byte fixed-channel signaling packet containing packed ECHO_REQ commands. Define Linux's BR/EDR signaling MTU as the spec minimum of 48 bytes and reject any larger signaling packet with one L2CAP_COMMAND_REJECT_RSP carrying L2CAP_REJ_MTU_EXCEEDED before any command is dispatched. The Bluetooth Core spec wording for MTUExceeded says the reject identifier shall match the first request command in the packet, and that packets containing only responses shall be silently discarded. Linux intentionally deviates from that prescription: silently discarding desynchronizes the peer because the remote stack never learns its responses were dropped, and locating the first request command requires walking command headers past MTUsig, i.e. processing bytes from a packet we have already decided is too large to process. We therefore always emit one reject and use the identifier from the first command header, a single fixed-offset byte read. The unrestricted BR/EDR signaling parser and ECHO_REQ response path both trace to the initial git import; no later introducing commit is available for a Fixes tag. Cc: stable@vger.kernel.org Suggested-by: Luiz Augusto von Dentz Link: https://lore.kernel.org/r/20260518002800.1361430-1-michael.bommarito@gmail.com Link: https://lore.kernel.org/r/20260520135034.1060859-1-michael.bommarito@gmail.com Link: https://lore.kernel.org/r/20260521000555.3712030-1-michael.bommarito@gmail.com Assisted-by: Claude:claude-opus-4-7 Assisted-by: Codex:gpt-5-5-xhigh Signed-off-by: Michael Bommarito Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 5172afee5494..e0a1f2293679 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -33,6 +33,7 @@ /* L2CAP defaults */ #define L2CAP_DEFAULT_MTU 672 #define L2CAP_DEFAULT_MIN_MTU 48 +#define L2CAP_SIG_MTU 48 /* BR/EDR signaling MTU */ #define L2CAP_DEFAULT_FLUSH_TO 0xFFFF #define L2CAP_EFS_DEFAULT_FLUSH_TO 0xFFFFFFFF #define L2CAP_DEFAULT_TX_WINDOW 63 diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 45b175399e8d..c4ccfbda9d78 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5643,6 +5643,15 @@ static inline void l2cap_sig_send_rej(struct l2cap_conn *conn, u16 ident) l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); } +static inline void l2cap_sig_send_mtu_rej(struct l2cap_conn *conn, u8 ident) +{ + struct l2cap_cmd_rej_mtu rej; + + rej.reason = cpu_to_le16(L2CAP_REJ_MTU_EXCEEDED); + rej.max_mtu = cpu_to_le16(L2CAP_SIG_MTU); + l2cap_send_cmd(conn, ident, L2CAP_COMMAND_REJ, sizeof(rej), &rej); +} + static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *skb) { @@ -5655,6 +5664,43 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, if (hcon->type != ACL_LINK) goto drop; + /* + * Bluetooth Core v5.4, Vol 3, Part A, Section 4: the BR/EDR + * signaling channel has a fixed signaling MTU (MTUsig) whose + * minimum and default is 48 octets. Section 4.1 says that on + * an MTUExceeded command reject the identifier "shall match + * the first request command in the L2CAP packet" and that + * packets containing only response commands "shall be + * silently discarded". + * + * Linux intentionally deviates from that prescription: + * + * 1. Silently discarding desynchronizes the peer. The + * remote stack never learns its responses were dropped, + * so any state machine waiting on a paired response + * stalls until its own timer fires. + * + * 2. Locating "the first request command" requires walking + * command headers past MTUsig, i.e. processing bytes + * from a packet we have already decided is too large to + * process. + * + * Reject every over-MTUsig signaling packet with one + * L2CAP_REJ_MTU_EXCEEDED command reject. The reject's + * reason field is what tells the peer that the whole packet + * was discarded; the identifier value is informational, so + * we use the identifier from the first command header, a + * single fixed-offset byte read. + */ + if (skb->len > L2CAP_SIG_MTU) { + u8 ident = skb->data[1]; + + BT_DBG("signaling packet exceeds MTU: %u > %u", + skb->len, L2CAP_SIG_MTU); + l2cap_sig_send_mtu_rej(conn, ident); + goto drop; + } + while (skb->len >= L2CAP_CMD_HDR_SIZE) { u16 len; -- cgit v1.2.3 From 5e939544f9d2b4d5c052a07cfcde97de44263946 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 2 Jun 2026 22:14:17 +1000 Subject: mptcp: fix uninit-value in mptcp_established_options syzbot reported the following uninit splat: BUG: KMSAN: uninit-value in mptcp_write_data_fin net/mptcp/options.c:542 [inline] BUG: KMSAN: uninit-value in mptcp_established_options_dss net/mptcp/options.c:590 [inline] BUG: KMSAN: uninit-value in mptcp_established_options+0x112f/0x3530 net/mptcp/options.c:874 mptcp_write_data_fin net/mptcp/options.c:542 [inline] mptcp_established_options_dss net/mptcp/options.c:590 [inline] mptcp_established_options+0x112f/0x3530 net/mptcp/options.c:874 tcp_established_options+0x312/0xcc0 net/ipv4/tcp_output.c:1192 __tcp_transmit_skb+0x5dc/0x5fe0 net/ipv4/tcp_output.c:1575 __tcp_send_ack+0x967/0xad0 net/ipv4/tcp_output.c:4499 tcp_send_ack+0x3d/0x60 net/ipv4/tcp_output.c:4505 mptcp_subflow_shutdown+0x164/0x690 net/mptcp/protocol.c:3137 mptcp_check_send_data_fin+0x31b/0x3d0 net/mptcp/protocol.c:3218 __mptcp_wr_shutdown net/mptcp/protocol.c:3234 [inline] __mptcp_close+0x860/0x1360 net/mptcp/protocol.c:3313 mptcp_close+0x42/0x260 net/mptcp/protocol.c:3367 inet_release+0x1ee/0x2a0 net/ipv4/af_inet.c:442 __sock_release net/socket.c:722 [inline] sock_close+0xd6/0x2f0 net/socket.c:1514 __fput+0x60e/0x1010 fs/file_table.c:510 ____fput+0x25/0x30 fs/file_table.c:538 task_work_run+0x208/0x2b0 kernel/task_work.c:233 resume_user_mode_work include/linux/resume_user_mode.h:50 [inline] __exit_to_user_mode_loop kernel/entry/common.c:67 [inline] exit_to_user_mode_loop+0x306/0x1b60 kernel/entry/common.c:98 __exit_to_user_mode_prepare include/linux/irq-entry-common.h:207 [inline] syscall_exit_to_user_mode_prepare include/linux/irq-entry-common.h:238 [inline] syscall_exit_to_user_mode include/linux/entry-common.h:318 [inline] __do_fast_syscall_32+0x2c7/0x460 arch/x86/entry/syscall_32.c:310 do_fast_syscall_32+0x37/0x80 arch/x86/entry/syscall_32.c:332 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:370 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Local variable opts created at: __tcp_transmit_skb+0x4d/0x5fe0 net/ipv4/tcp_output.c:1536 __tcp_send_ack+0x967/0xad0 net/ipv4/tcp_output.c:4499 The output path currently omits initializing the mptcp extension `use_map` flag in a few corner cases. Address the issue always zeroing all the extensions flags before eventually initializing the individual bits. To that extent, introduce and use a struct_group to avoid multiple bitwise operations. Fixes: cfcceb7a39fc ("tcp: shrink per-packet memset in __tcp_transmit_skb()") Cc: stable@vger.kernel.org Reported-by: syzbot+ff020673c5e3d94d9478@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=ff020673c5e3d94d9478 Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20260602-net-mptcp-misc-fixes-7-1-rc7-v2-10-856831229976@kernel.org Signed-off-by: Jakub Kicinski --- include/net/mptcp.h | 7 +++++-- net/mptcp/options.c | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/mptcp.h b/include/net/mptcp.h index f7263fe2a2e4..ee70f597a4de 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -27,7 +27,9 @@ struct mptcp_ext { u32 subflow_seq; u16 data_len; __sum16 csum; - u8 use_map:1, + + struct_group(flags, + u8 use_map:1, dsn64:1, data_fin:1, use_ack:1, @@ -35,9 +37,10 @@ struct mptcp_ext { mpc_map:1, frozen:1, reset_transient:1; - u8 reset_reason:4, + u8 reset_reason:4, csum_reqd:1, infinite_map:1; + ); /* end of flags group */ }; #define MPTCPOPT_HMAC_LEN 20 diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 51ca334678b4..f9f587203c35 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -572,6 +572,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, unsigned int ack_size; bool ret = false; + /* Zero `use_ack` and `use_map` flags with one shot. */ + BUILD_BUG_ON(sizeof_field(struct mptcp_ext, flags) != sizeof(u16)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_ext, flags), + sizeof(u16))); + *(u16 *)&opts->ext_copy.flags = 0; opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL; @@ -595,7 +600,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, /* passive sockets msk will set the 'can_ack' after accept(), even * if the first subflow may have the already the remote key handy */ - opts->ext_copy.use_ack = 0; if (!READ_ONCE(msk->can_ack)) { *size = ALIGN(dss_size, 4); return ret; -- cgit v1.2.3 From 0652a3daa78723f955b1ebeb621665ce72bec53e Mon Sep 17 00:00:00 2001 From: Eva Kurchatova Date: Wed, 3 Jun 2026 18:31:42 +0300 Subject: tracing: Fix CFI violation in probestub being called by tprobes The probestub is a function to allow tprobes to hook to a tracepoint to gain access to its parameters. The function itself is only referenced by the tracepoint structure which lives in the __tracepoint section. objtool explicitly ignores that section and when processing functions in the kernel, if it detects one that has no references it will seal it to have its ENDBR stripped on boot up. This means when a tprobe is attached to the sched_wakeup tracepoint, when it is triggered it will call __probestub_sched_wakeup and due to the missing ENDBR on a CFI-enabled machine it will take a #CP exception. Fix this by adding CFI_NOSEAL annotation to probestub declaration. Cc: stable@vger.kernel.org Acked-by: Masami Hiramatsu (Google) Link: https://patch.msgid.link/20260603153147.573589-1-eva.kurchatova@virtuozzo.com Fixes: d5173f753750 ("objtool: Exclude __tracepoints data from ENDBR checks") Signed-off-by: Eva Kurchatova [ Updated change log ] Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 763eea4d80d8..2d2b9f8cdda4 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -20,6 +20,7 @@ #include #include #include +#include struct module; struct tracepoint; @@ -389,6 +390,13 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) void __probestub_##_name(void *__data, proto) \ { \ } \ + /* \ + * Annotate the probestub 'CFI_NOSEAL' to stop objtool from \ + * requesting the kernel remove the ENDBR, because the only \ + * references to the function are in the __tracepoint section, \ + * that objtool doesn't scan. \ + */ \ + CFI_NOSEAL(__probestub_##_name); \ DEFINE_STATIC_CALL(tp_func_##_name, __traceiter_##_name); \ DEFINE_RUST_DO_TRACE(_name, TP_PROTO(proto), TP_ARGS(args)) -- cgit v1.2.3 From 899ee91156e57784090c5565e4f31bd7dbffbc5a Mon Sep 17 00:00:00 2001 From: Rajat Gupta Date: Sun, 31 May 2026 08:32:21 -0400 Subject: net/sched: fix pedit partial COW leading to page cache corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcf_pedit_act() computes the COW range for skb_ensure_writable() once before the key loop using tcfp_off_max_hint, but the hint does not account for the runtime header offset added by typed keys. This can leave part of the write region un-COW'd. Fix by moving skb_ensure_writable() inside the per-key loop where the actual write offset is known, and add overflow checking on the offset arithmetic. For negative offsets (e.g. Ethernet header edits at ingress), use skb_cow() to COW the headroom instead. Guard offset_valid() against INT_MIN, where negation is undefined. Fixes: 8b796475fd78 ("net/sched: act_pedit: really ensure the skb is writable") Reported-by: Yiming Qian Reported-by: Keenan Dong Reported-by: Han Guidong <2045gemini@gmail.com> Reported-by: Zhang Cen Reviewed-by: Han Guidong <2045gemini@gmail.com> Tested-by: Han Guidong <2045gemini@gmail.com> Reviewed-by: Davide Caratti Tested-by: Davide Caratti Reviewed-by: Toke Høiland-Jørgensen Tested-by: Toke Høiland-Jørgensen Reviewed-by: Victor Nogueira Tested-by: Victor Nogueira Acked-by: Jamal Hadi Salim Signed-off-by: Rajat Gupta Link: https://patch.msgid.link/20260531123221.48732-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- include/net/tc_act/tc_pedit.h | 1 - net/sched/act_pedit.c | 77 +++++++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index f58ee15cd858..cb7b82f2cbc7 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -15,7 +15,6 @@ struct tcf_pedit_parms { struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; int action; - u32 tcfp_off_max_hint; unsigned char tcfp_nkeys; unsigned char tcfp_flags; struct rcu_head rcu; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index bc20f08a2789..bd3b1da3cd63 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include #include @@ -242,7 +244,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_free_ex; } - nparms->tcfp_off_max_hint = 0; nparms->tcfp_flags = parm->flags; nparms->tcfp_nkeys = parm->nkeys; @@ -268,14 +269,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, BITS_PER_TYPE(int) - 1, nparms->tcfp_keys[i].shift); - /* The AT option can read a single byte, we can bound the actual - * value with uchar max. - */ - cur += (0xff & offmask) >> nparms->tcfp_keys[i].shift; - - /* Each key touches 4 bytes starting from the computed offset */ - nparms->tcfp_off_max_hint = - max(nparms->tcfp_off_max_hint, cur + 4); } p = to_pedit(*a); @@ -318,15 +311,12 @@ static void tcf_pedit_cleanup(struct tc_action *a) call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu); } -static bool offset_valid(struct sk_buff *skb, int offset) +static bool offset_valid(struct sk_buff *skb, int offset, int len) { - if (offset > 0 && offset > skb->len) - return false; - - if (offset < 0 && -offset > skb_headroom(skb)) + if (offset < -(int)skb_headroom(skb)) return false; - return true; + return offset <= (int)skb->len - len; } static int pedit_l4_skb_offset(struct sk_buff *skb, int *hoffset, const int header_type) @@ -393,18 +383,10 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, struct tcf_pedit_key_ex *tkey_ex; struct tcf_pedit_parms *parms; struct tc_pedit_key *tkey; - u32 max_offset; int i; parms = rcu_dereference_bh(p->parms); - max_offset = (skb_transport_header_was_set(skb) ? - skb_transport_offset(skb) : - skb_network_offset(skb)) + - parms->tcfp_off_max_hint; - if (skb_ensure_writable(skb, min(skb->len, max_offset))) - goto done; - tcf_lastuse_update(&p->tcf_tm); tcf_action_update_bstats(&p->common, skb); @@ -412,10 +394,11 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, tkey_ex = parms->tcfp_keys_ex; for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) { + int write_offset, write_len; int offset = tkey->off; int hoffset = 0; - u32 *ptr, hdata; - u32 val; + u32 cur_val, val; + u32 *ptr; int rc; if (tkey_ex) { @@ -433,13 +416,15 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, if (tkey->offmask) { u8 *d, _d; + int at_offset; - if (!offset_valid(skb, hoffset + tkey->at)) { + if (check_add_overflow(hoffset, (int)tkey->at, &at_offset) || + !offset_valid(skb, at_offset, sizeof(_d))) { pr_info_ratelimited("tc action pedit 'at' offset %d out of bounds\n", hoffset + tkey->at); goto bad; } - d = skb_header_pointer(skb, hoffset + tkey->at, + d = skb_header_pointer(skb, at_offset, sizeof(_d), &_d); if (!d) goto bad; @@ -451,31 +436,51 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, } } - if (!offset_valid(skb, hoffset + offset)) { - pr_info_ratelimited("tc action pedit offset %d out of bounds\n", hoffset + offset); + if (check_add_overflow(hoffset, offset, &write_offset)) { + pr_info_ratelimited("tc action pedit offset overflow\n"); goto bad; } - ptr = skb_header_pointer(skb, hoffset + offset, - sizeof(hdata), &hdata); - if (!ptr) + if (!offset_valid(skb, write_offset, sizeof(*ptr))) { + pr_info_ratelimited("tc action pedit offset %d out of bounds\n", + write_offset); goto bad; + } + + if (write_offset < 0) { + if (skb_cow(skb, -write_offset)) + goto bad; + if (write_offset + (int)sizeof(*ptr) > 0) { + if (skb_ensure_writable(skb, + min_t(int, skb->len, + write_offset + (int)sizeof(*ptr)))) + goto bad; + } + } else { + if (check_add_overflow(write_offset, (int)sizeof(*ptr), + &write_len)) + goto bad; + if (skb_ensure_writable(skb, min_t(int, skb->len, + write_len))) + goto bad; + } + + ptr = (u32 *)(skb->data + write_offset); + cur_val = get_unaligned(ptr); /* just do it, baby */ switch (cmd) { case TCA_PEDIT_KEY_EX_CMD_SET: val = tkey->val; break; case TCA_PEDIT_KEY_EX_CMD_ADD: - val = (*ptr + tkey->val) & ~tkey->mask; + val = (cur_val + tkey->val) & ~tkey->mask; break; default: pr_info_ratelimited("tc action pedit bad command (%d)\n", cmd); goto bad; } - *ptr = ((*ptr & tkey->mask) ^ val); - if (ptr == &hdata) - skb_store_bits(skb, hoffset + offset, ptr, 4); + put_unaligned((cur_val & tkey->mask) ^ val, ptr); } goto done; -- cgit v1.2.3 From 979c294509f9248fe1e7c358d582fb37dd5ca12d Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 4 Jun 2026 17:33:21 -0700 Subject: cfi: Include uaccess.h for get_kernel_nofault() After commit 0652a3daa787 ("tracing: Fix CFI violation in probestub being called by tprobes"), there are many build errors when building ARCH=arm multi_v7_defconfig + CONFIG_CFI=y like: In file included from drivers/base/devres.c:17: In file included from drivers/base/trace.h:16: In file included from include/linux/tracepoint.h:23: include/linux/cfi.h:44:6: error: call to undeclared function 'get_kernel_nofault'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 44 | if (get_kernel_nofault(hash, func - cfi_get_offset())) | ^ 1 error generated. get_kernel_nofault() is called in the generic version of cfi_get_func_hash() but nothing ensures uaccess.h is always included for a proper expansion and prototype. Include uaccess.h in cfi.h to clear up the errors. Cc: stable@vger.kernel.org Fixes: 0652a3daa787 ("tracing: Fix CFI violation in probestub being called by tprobes") Signed-off-by: Nathan Chancellor Acked-by: Masami Hiramatsu (Google) Reviewed-by: Sami Tolvanen Signed-off-by: Linus Torvalds --- include/linux/cfi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/cfi.h b/include/linux/cfi.h index 1fd22ea6eba4..0f220d29225c 100644 --- a/include/linux/cfi.h +++ b/include/linux/cfi.h @@ -9,6 +9,7 @@ #include #include +#include #include #ifdef CONFIG_CFI -- cgit v1.2.3 From badad6fad60def1b9805559dd81dbab3d97b82aa Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 4 Jun 2026 15:03:13 -0300 Subject: RDMA: During rereg_mr ensure that REREG_ACCESS is compatible If IB_MR_REREG_ACCESS changes from RO to RW then the umem has to be re-evaluated to ensure it is properly pinned as RW. Since the umem is hidden inside each driver's mr struct add a ib_umem_check_rereg() function that each driver has to call before processing IB_MR_REREG_ACCESS. mlx4 has to retain its duplicate ib_access_writable check because it implements IB_MR_REREG_ACCESS | IB_MR_REREG_TRANS by changing both items in place sequentially while the MR is live, so it will continue to not support this combination. Cc: stable@vger.kernel.org Fixes: b40656aa7d55 ("RDMA/umem: remove FOLL_FORCE usage") Link: https://patch.msgid.link/r/0-v1-06fb1a2d6cf5+107-rereg_access_jgg@nvidia.com Reported-by: Philip Tsukerman Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/umem.c | 16 ++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++++ drivers/infiniband/hw/irdma/verbs.c | 4 ++++ drivers/infiniband/hw/mlx4/mr.c | 4 ++++ drivers/infiniband/hw/mlx5/mr.c | 4 ++++ drivers/infiniband/sw/rxe/rxe_verbs.c | 5 +++++ include/rdma/ib_umem.h | 8 ++++++++ 7 files changed, 45 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 786fa1aa8e55..4b055712b0d0 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -332,3 +332,19 @@ int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset, return 0; } EXPORT_SYMBOL(ib_umem_copy_from); + +/* + * Called during rereg mr if the driver is able to re-use a umem for + * IB_MR_REREG_ACCESS. + */ +int ib_umem_check_rereg(struct ib_umem *umem, int flags, int new_access_flags) +{ + if (!umem) + return 0; + + if ((flags & IB_MR_REREG_ACCESS) && !(flags & IB_MR_REREG_TRANS)) + if (ib_access_writable(new_access_flags) && !umem->writable) + return -EACCES; + return 0; +} +EXPORT_SYMBOL(ib_umem_check_rereg); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 896af1828a38..25bfd3970f5b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -300,6 +300,10 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, goto err_out; } + ret = ib_umem_check_rereg(mr->pbl_mtr.umem, flags, mr_access_flags); + if (ret) + goto err_out; + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); ret = PTR_ERR_OR_ZERO(mailbox); if (ret) diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 17086048d2d7..8cd427532805 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -3803,6 +3803,10 @@ static struct ib_mr *irdma_rereg_user_mr(struct ib_mr *ib_mr, int flags, if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return ERR_PTR(-EOPNOTSUPP); + ret = ib_umem_check_rereg(iwmr->region, flags, new_access); + if (ret) + return ERR_PTR(ret); + if (dmabuf_revocable) { umem_dmabuf = to_ib_umem_dmabuf(iwmr->region); diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index 650b4a9121ff..6747bca30677 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -209,6 +209,10 @@ struct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; int err; + err = ib_umem_check_rereg(mmr->umem, flags, mr_access_flags); + if (err) + return ERR_PTR(err); + /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, * we assume that the calls can't run concurrently. Otherwise, a * race exists. diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 3b6da45061a5..fb40b44496f4 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1179,6 +1179,10 @@ struct ib_mr *mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, if (flags & ~(IB_MR_REREG_TRANS | IB_MR_REREG_PD | IB_MR_REREG_ACCESS)) return ERR_PTR(-EOPNOTSUPP); + err = ib_umem_check_rereg(mr->umem, flags, new_access_flags); + if (err) + return ERR_PTR(err); + if (!(flags & IB_MR_REREG_ACCESS)) new_access_flags = mr->access_flags; if (!(flags & IB_MR_REREG_PD)) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4d4891dc2884..4cf04a44189c 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -1319,6 +1319,7 @@ static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags, struct rxe_mr *mr = to_rmr(ibmr); struct rxe_pd *old_pd = to_rpd(ibmr->pd); struct rxe_pd *pd = to_rpd(ibpd); + int err; /* for now only support the two easy cases: * rereg_pd and rereg_access @@ -1328,6 +1329,10 @@ static struct ib_mr *rxe_rereg_user_mr(struct ib_mr *ibmr, int flags, return ERR_PTR(-EOPNOTSUPP); } + err = ib_umem_check_rereg(mr->umem, flags, access); + if (err) + return ERR_PTR(err); + if (flags & IB_MR_REREG_PD) { rxe_put(old_pd); rxe_get(pd); diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 2ad52cc1d52b..49172098a8de 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -156,6 +156,8 @@ void ib_umem_dmabuf_revoke_lock(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_revoke_unlock(struct ib_umem_dmabuf *umem_dmabuf); void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf); +int ib_umem_check_rereg(struct ib_umem *umem, int flags, int new_access_flags); + #else /* CONFIG_INFINIBAND_USER_MEM */ #include @@ -230,5 +232,11 @@ static inline void ib_umem_dmabuf_revoke_lock(struct ib_umem_dmabuf *umem_dmabuf static inline void ib_umem_dmabuf_revoke_unlock(struct ib_umem_dmabuf *umem_dmabuf) {} static inline void ib_umem_dmabuf_revoke(struct ib_umem_dmabuf *umem_dmabuf) {} +static inline int ib_umem_check_rereg(struct ib_umem *umem, int flags, + int new_access_flags) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_INFINIBAND_USER_MEM */ #endif /* IB_UMEM_H */ -- cgit v1.2.3