From 8a9ebe8c3ca4c5bdad8f010656f4c2155da589fd Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 28 Feb 2026 17:48:22 -0800 Subject: gpio: timberdale: repair kernel-doc comments Use a ':' after struct member names to avoid kernel-doc warnings: Warning: include/linux/timb_gpio.h:22 struct member 'gpio_base' not described in 'timbgpio_platform_data' Warning: include/linux/timb_gpio.h:22 struct member 'nr_pins' not described in 'timbgpio_platform_data' Warning: include/linux/timb_gpio.h:22 struct member 'irq_base' not described in 'timbgpio_platform_data' Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260301014822.3133268-1-rdunlap@infradead.org Signed-off-by: Bartosz Golaszewski --- include/linux/timb_gpio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/timb_gpio.h b/include/linux/timb_gpio.h index 3faf5a6bb13e..74f5e73bf6db 100644 --- a/include/linux/timb_gpio.h +++ b/include/linux/timb_gpio.h @@ -9,10 +9,10 @@ /** * struct timbgpio_platform_data - Platform data of the Timberdale GPIO driver - * @gpio_base The number of the first GPIO pin, set to -1 for + * @gpio_base: The number of the first GPIO pin, set to -1 for * dynamic number allocation. - * @nr_pins Number of pins that is supported by the hardware (1-32) - * @irq_base If IRQ is supported by the hardware, this is the base + * @nr_pins: Number of pins that is supported by the hardware (1-32) + * @irq_base: If IRQ is supported by the hardware, this is the base * number of IRQ:s. One IRQ per pin will be used. Set to * -1 if IRQ:s is not supported. */ -- cgit v1.2.3 From 189645ba9cd9c1eed45151aacaae4347c1eb86a7 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 28 Feb 2026 17:48:11 -0800 Subject: gpio: nomadik: repair some kernel-doc comments Avoid these kernel-doc warnings by: - adding short descriptions for enums - using correct (matching) struct names in kernel-doc short descriptions - using the correct struct member name for @nfunctions Warning: include/linux/gpio/gpio-nomadik.h:116 missing initial short description on line: * enum prcm_gpiocr_reg_index Warning: include/linux/gpio/gpio-nomadik.h:125 missing initial short description on line: * enum prcm_gpiocr_altcx_index Warning: include/linux/gpio/gpio-nomadik.h:146 expecting prototype for struct prcm_gpio_altcx. Prototype was for struct prcm_gpiocr_altcx instead Warning: include/linux/gpio/gpio-nomadik.h:156 expecting prototype for struct prcm_gpio_altcx_pin_desc. Prototype was for struct prcm_gpiocr_altcx_pin_desc instead Warning: include/linux/gpio/gpio-nomadik.h:212 struct member 'nfunctions' not described in 'nmk_pinctrl_soc_data' Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260301014811.3133250-1-rdunlap@infradead.org Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/gpio-nomadik.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/gpio/gpio-nomadik.h b/include/linux/gpio/gpio-nomadik.h index 592a774a53cd..8061b9826361 100644 --- a/include/linux/gpio/gpio-nomadik.h +++ b/include/linux/gpio/gpio-nomadik.h @@ -114,8 +114,7 @@ struct nmk_gpio_chip { } /** - * enum prcm_gpiocr_reg_index - * Used to reference an PRCM GPIOCR register address. + * enum prcm_gpiocr_reg_index - Used to reference a PRCM GPIOCR register address. */ enum prcm_gpiocr_reg_index { PRCM_IDX_GPIOCR1, @@ -123,8 +122,7 @@ enum prcm_gpiocr_reg_index { PRCM_IDX_GPIOCR3 }; /** - * enum prcm_gpiocr_altcx_index - * Used to reference an Other alternate-C function. + * enum prcm_gpiocr_altcx_index - Used to reference an Other alternate-C function. */ enum prcm_gpiocr_altcx_index { PRCM_IDX_GPIOCR_ALTC1, @@ -135,7 +133,7 @@ enum prcm_gpiocr_altcx_index { }; /** - * struct prcm_gpio_altcx - Other alternate-C function + * struct prcm_gpiocr_altcx - Other alternate-C function * @used: other alternate-C function availability * @reg_index: PRCM GPIOCR register index used to control the function * @control_bit: PRCM GPIOCR bit used to control the function @@ -147,7 +145,7 @@ struct prcm_gpiocr_altcx { } __packed; /** - * struct prcm_gpio_altcx_pin_desc - Other alternate-C pin + * struct prcm_gpiocr_altcx_pin_desc - Other alternate-C pin * @pin: The pin number * @altcx: array of other alternate-C[1-4] functions */ @@ -193,7 +191,7 @@ struct nmk_pingroup { * numbering. * @npins: The number of entries in @pins. * @functions: The functions supported on this SoC. - * @nfunction: The number of entries in @functions. + * @nfunctions: The number of entries in @functions. * @groups: An array describing all pin groups the pin SoC supports. * @ngroups: The number of entries in @groups. * @altcx_pins: The pins that support Other alternate-C function on this SoC -- cgit v1.2.3 From 56bd57e7b161f75535df91b229b0b2c64c6e5581 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Sat, 28 Feb 2026 14:02:22 -0600 Subject: iio: add IIO_DECLARE_QUATERNION() macro Add a new IIO_DECLARE_QUATERNION() macro that is used to declare the field in an IIO buffer struct that contains a quaternion vector. Quaternions are currently the only IIO data type that uses the .repeat feature of struct iio_scan_type. This has an implicit rule that the element in the buffer must be aligned to the entire size of the repeated element. This macro will make that requirement explicit. Since this is the only user, we just call the macro IIO_DECLARE_QUATERNION() instead of something more generic. Signed-off-by: David Lechner Reviewed-by: Andy Shevchenko Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index a9ecff191bd9..2c91b7659ce9 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -931,6 +931,18 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) #define IIO_DECLARE_DMA_BUFFER_WITH_TS(type, name, count) \ __IIO_DECLARE_BUFFER_WITH_TS(type, name, count) __aligned(IIO_DMA_MINALIGN) +/** + * IIO_DECLARE_QUATERNION() - Declare a quaternion element + * @type: element type of the individual vectors + * @name: identifier name + * + * Quaternions are a vector composed of 4 elements (W, X, Y, Z). Use this macro + * to declare a quaternion element in a struct to ensure proper alignment in + * an IIO buffer. + */ +#define IIO_DECLARE_QUATERNION(type, name) \ + type name[4] __aligned(sizeof(type) * 4) + struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ -- cgit v1.2.3 From 1b164b876c36c3eb5561dd9b37702b04401b0166 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 24 Mar 2026 10:21:25 -1000 Subject: cgroup: Wait for dying tasks to leave on rmdir a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") hid PF_EXITING tasks from cgroup.procs so that systemd doesn't see tasks that have already been reaped via waitpid(). However, the populated counter (nr_populated_csets) is only decremented when the task later passes through cgroup_task_dead() in finish_task_switch(). This means cgroup.procs can appear empty while the cgroup is still populated, causing rmdir to fail with -EBUSY. Fix this by making cgroup_rmdir() wait for dying tasks to fully leave. If the cgroup is populated but all remaining tasks have PF_EXITING set (the task iterator returns none due to the existing filter), wait for a kick from cgroup_task_dead() and retry. The wait is brief as tasks are removed from the cgroup's css_set between PF_EXITING assertion in do_exit() and cgroup_task_dead() in finish_task_switch(). v2: cgroup_is_populated() true to false transition happens under css_set_lock not cgroup_mutex, so retest under css_set_lock before sleeping to avoid missed wakeups (Sebastian). Fixes: a72f73c4dd9b ("cgroup: Don't expose dead tasks in cgroup") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202603222104.2c81684e-lkp@intel.com Reported-by: Sebastian Andrzej Siewior Signed-off-by: Tejun Heo Reviewed-by: Sebastian Andrzej Siewior Cc: Bert Karwatzki Cc: Michal Koutny Cc: cgroups@vger.kernel.org --- include/linux/cgroup-defs.h | 3 ++ kernel/cgroup/cgroup.c | 86 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 86 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index bb92f5c169ca..7f87399938fa 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -609,6 +609,9 @@ struct cgroup { /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; + /* used by cgroup_rmdir() to wait for dying tasks to leave */ + wait_queue_head_t dying_populated_waitq; + /* used to schedule release agent */ struct work_struct release_agent_work; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 01fc2a93f3ef..2163054e1aa6 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2126,6 +2126,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) #endif init_waitqueue_head(&cgrp->offline_waitq); + init_waitqueue_head(&cgrp->dying_populated_waitq); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } @@ -6224,6 +6225,76 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return 0; }; +/** + * cgroup_drain_dying - wait for dying tasks to leave before rmdir + * @cgrp: the cgroup being removed + * + * The PF_EXITING filter in css_task_iter_advance() hides exiting tasks from + * cgroup.procs so that userspace (e.g. systemd) doesn't see tasks that have + * already been reaped via waitpid(). However, the populated counter + * (nr_populated_csets) is only decremented when the task later passes through + * cgroup_task_dead() in finish_task_switch(). This creates a window where + * cgroup.procs appears empty but cgroup_is_populated() is still true, causing + * rmdir to fail with -EBUSY. + * + * This function bridges that gap. If the cgroup is populated but all remaining + * tasks have PF_EXITING set, we wait for cgroup_task_dead() to process them. + * Tasks are removed from the cgroup's css_set in cgroup_task_dead() called from + * finish_task_switch(). As the window between PF_EXITING and cgroup_task_dead() + * is short, the number of PF_EXITING tasks on the list is small and the wait + * is brief. + * + * Each cgroup_task_dead() kicks the waitqueue via cset->cgrp_links, and we + * retry the full check from scratch. + * + * Must be called with cgroup_mutex held. + */ +static int cgroup_drain_dying(struct cgroup *cgrp) + __releases(&cgroup_mutex) __acquires(&cgroup_mutex) +{ + struct css_task_iter it; + struct task_struct *task; + DEFINE_WAIT(wait); + + lockdep_assert_held(&cgroup_mutex); +retry: + if (!cgroup_is_populated(cgrp)) + return 0; + + /* Same iterator as cgroup.threads - if any task is visible, it's busy */ + css_task_iter_start(&cgrp->self, 0, &it); + task = css_task_iter_next(&it); + css_task_iter_end(&it); + + if (task) + return -EBUSY; + + /* + * All remaining tasks are PF_EXITING and will pass through + * cgroup_task_dead() shortly. Wait for a kick and retry. + * + * cgroup_is_populated() can't transition from false to true while + * we're holding cgroup_mutex, but the true to false transition + * happens under css_set_lock (via cgroup_task_dead()). We must + * retest and prepare_to_wait() under css_set_lock. Otherwise, the + * transition can happen between our first test and + * prepare_to_wait(), and we sleep with no one to wake us. + */ + spin_lock_irq(&css_set_lock); + if (!cgroup_is_populated(cgrp)) { + spin_unlock_irq(&css_set_lock); + return 0; + } + prepare_to_wait(&cgrp->dying_populated_waitq, &wait, + TASK_UNINTERRUPTIBLE); + spin_unlock_irq(&css_set_lock); + mutex_unlock(&cgroup_mutex); + schedule(); + finish_wait(&cgrp->dying_populated_waitq, &wait); + mutex_lock(&cgroup_mutex); + goto retry; +} + int cgroup_rmdir(struct kernfs_node *kn) { struct cgroup *cgrp; @@ -6233,9 +6304,12 @@ int cgroup_rmdir(struct kernfs_node *kn) if (!cgrp) return 0; - ret = cgroup_destroy_locked(cgrp); - if (!ret) - TRACE_CGROUP_PATH(rmdir, cgrp); + ret = cgroup_drain_dying(cgrp); + if (!ret) { + ret = cgroup_destroy_locked(cgrp); + if (!ret) + TRACE_CGROUP_PATH(rmdir, cgrp); + } cgroup_kn_unlock(kn); return ret; @@ -6995,6 +7069,7 @@ void cgroup_task_exit(struct task_struct *tsk) static void do_cgroup_task_dead(struct task_struct *tsk) { + struct cgrp_cset_link *link; struct css_set *cset; unsigned long flags; @@ -7008,6 +7083,11 @@ static void do_cgroup_task_dead(struct task_struct *tsk) if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live)) list_add_tail(&tsk->cg_list, &cset->dying_tasks); + /* kick cgroup_drain_dying() waiters, see cgroup_rmdir() */ + list_for_each_entry(link, &cset->cgrp_links, cgrp_link) + if (waitqueue_active(&link->cgrp->dying_populated_waitq)) + wake_up(&link->cgrp->dying_populated_waitq); + if (dl_task(tsk)) dec_dl_tasks_cs(tsk); -- cgit v1.2.3 From 629ec78ef8608d955ce217880cdc3e1873af3a15 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 24 Mar 2026 00:25:57 +0100 Subject: mpls: add seqcount to protect the platform_label{,s} pair The RCU-protected codepaths (mpls_forward, mpls_dump_routes) can have an inconsistent view of platform_labels vs platform_label in case of a concurrent resize (resize_platform_label_table, under platform_mutex). This can lead to OOB accesses. This patch adds a seqcount, so that we get a consistent snapshot. Note that mpls_label_ok is also susceptible to this, so the check against RTA_DST in rtm_to_route_config, done outside platform_mutex, is not sufficient. This value gets passed to mpls_label_ok once more in both mpls_route_add and mpls_route_del, so there is no issue, but that additional check must not be removed. Reported-by: Yuan Tan Reported-by: Yifan Wu Reported-by: Juefei Pu Reported-by: Xin Liu Fixes: 7720c01f3f590 ("mpls: Add a sysctl to control the size of the mpls label table") Fixes: dde1b38e873c ("mpls: Convert mpls_dump_routes() to RCU.") Signed-off-by: Sabrina Dubroca Link: https://patch.msgid.link/cd8fca15e3eb7e212b094064cd83652e20fd9d31.1774284088.git.sd@queasysnail.net Signed-off-by: Jakub Kicinski --- include/net/netns/mpls.h | 1 + net/mpls/af_mpls.c | 29 +++++++++++++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/netns/mpls.h b/include/net/netns/mpls.h index 6682e51513ef..2073cbac2afb 100644 --- a/include/net/netns/mpls.h +++ b/include/net/netns/mpls.h @@ -17,6 +17,7 @@ struct netns_mpls { size_t platform_labels; struct mpls_route __rcu * __rcu *platform_label; struct mutex platform_mutex; + seqcount_mutex_t platform_label_seq; struct ctl_table_header *ctl; }; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index d5417688f69e..18d3da8ab384 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -83,14 +83,30 @@ static struct mpls_route *mpls_route_input(struct net *net, unsigned int index) return mpls_dereference(net, platform_label[index]); } +static struct mpls_route __rcu **mpls_platform_label_rcu(struct net *net, size_t *platform_labels) +{ + struct mpls_route __rcu **platform_label; + unsigned int sequence; + + do { + sequence = read_seqcount_begin(&net->mpls.platform_label_seq); + platform_label = rcu_dereference(net->mpls.platform_label); + *platform_labels = net->mpls.platform_labels; + } while (read_seqcount_retry(&net->mpls.platform_label_seq, sequence)); + + return platform_label; +} + static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index) { struct mpls_route __rcu **platform_label; + size_t platform_labels; + + platform_label = mpls_platform_label_rcu(net, &platform_labels); - if (index >= net->mpls.platform_labels) + if (index >= platform_labels) return NULL; - platform_label = rcu_dereference(net->mpls.platform_label); return rcu_dereference(platform_label[index]); } @@ -2240,8 +2256,7 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb) if (index < MPLS_LABEL_FIRST_UNRESERVED) index = MPLS_LABEL_FIRST_UNRESERVED; - platform_label = rcu_dereference(net->mpls.platform_label); - platform_labels = net->mpls.platform_labels; + platform_label = mpls_platform_label_rcu(net, &platform_labels); if (filter.filter_set) flags |= NLM_F_DUMP_FILTERED; @@ -2645,8 +2660,12 @@ static int resize_platform_label_table(struct net *net, size_t limit) } /* Update the global pointers */ + local_bh_disable(); + write_seqcount_begin(&net->mpls.platform_label_seq); net->mpls.platform_labels = limit; rcu_assign_pointer(net->mpls.platform_label, labels); + write_seqcount_end(&net->mpls.platform_label_seq); + local_bh_enable(); mutex_unlock(&net->mpls.platform_mutex); @@ -2728,6 +2747,8 @@ static __net_init int mpls_net_init(struct net *net) int i; mutex_init(&net->mpls.platform_mutex); + seqcount_mutex_init(&net->mpls.platform_label_seq, &net->mpls.platform_mutex); + net->mpls.platform_labels = 0; net->mpls.platform_label = NULL; net->mpls.ip_ttl_propagate = 1; -- cgit v1.2.3 From 57a04a13aac1f247d171c3f3aef93efc69e6979e Mon Sep 17 00:00:00 2001 From: Qingfang Deng Date: Tue, 24 Mar 2026 22:08:56 +0800 Subject: netdevsim: fix build if SKB_EXTENSIONS=n __skb_ext_put() is not declared if SKB_EXTENSIONS is not enabled, which causes a build error: drivers/net/netdevsim/netdev.c: In function 'nsim_forward_skb': drivers/net/netdevsim/netdev.c:114:25: error: implicit declaration of function '__skb_ext_put'; did you mean 'skb_ext_put'? [-Werror=implicit-function-declaration] 114 | __skb_ext_put(psp_ext); | ^~~~~~~~~~~~~ | skb_ext_put cc1: some warnings being treated as errors Add a stub to fix the build. Fixes: 7d9351435ebb ("netdevsim: drop PSP ext ref on forward failure") Signed-off-by: Qingfang Deng Link: https://patch.msgid.link/20260324140857.783-1-dqfext@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index daa4e4944ce3..2f278ce376b7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -5097,6 +5097,7 @@ static inline bool skb_has_extensions(struct sk_buff *skb) return unlikely(skb->active_extensions); } #else +static inline void __skb_ext_put(struct skb_ext *ext) {} static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} -- cgit v1.2.3 From 90c5def10bea574b101b7a520c015ca81742183f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 2 Mar 2026 18:22:52 -0400 Subject: iommu: Do not call drivers for empty gathers An empty gather is coded with start=U64_MAX, end=0 and several drivers go on to convert that to a size with: end - start + 1 Which gives 2 for an empty gather. This then causes Weird Stuff to happen (for example an UBSAN splat in VT-d) that is hopefully harmless, but maybe not. Prevent drivers from being called right in iommu_iotlb_sync(). Auditing shows that AMD, Intel, Mediatek and RSIC-V drivers all do things on these empty gathers. Further, there are several callers that can trigger empty gathers, especially in unusual conditions. For example iommu_map_nosync() will call a 0 size unmap on some error paths. Also in VFIO, iommupt and other places. Cc: stable@vger.kernel.org Reported-by: Janusz Krzysztofik Closes: https://lore.kernel.org/r/11145826.aFP6jjVeTY@jkrzyszt-mobl2.ger.corp.intel.com Signed-off-by: Jason Gunthorpe Reviewed-by: Lu Baolu Reviewed-by: Samiullah Khawaja Reviewed-by: Robin Murphy Reviewed-by: Vasant Hegde Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 54b8b48c762e..555597b54083 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -980,7 +980,8 @@ static inline void iommu_flush_iotlb_all(struct iommu_domain *domain) static inline void iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { - if (domain->ops->iotlb_sync) + if (domain->ops->iotlb_sync && + likely(iotlb_gather->start < iotlb_gather->end)) domain->ops->iotlb_sync(domain, iotlb_gather); iommu_iotlb_gather_init(iotlb_gather); -- cgit v1.2.3 From fffca572f9ca51607f180a37d0c898404c8f9112 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 26 Mar 2026 15:06:31 +0100 Subject: mpage: Provide variant of mpage_writepages() with own optional folio handler Some filesystems need to treat some folios specially (for example for inodes with inline data). Doing the handling in their .writepages method in a race-free manner results in duplicating some of the writeback internals. So provide generalized version of mpage_writepages() that allows filesystem to provide a handler called for each folio which can handle the folio in a special way. Reviewed-by: Christoph Hellwig Link: https://patch.msgid.link/20260326140635.15895-3-jack@suse.cz Signed-off-by: Jan Kara --- fs/mpage.c | 30 ++++++++++++++++++++++++------ include/linux/mpage.h | 11 +++++++++-- 2 files changed, 33 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/mpage.c b/fs/mpage.c index 7dae5afc2b9e..b3d9f231a04a 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -646,17 +646,24 @@ out: } /** - * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them + * __mpage_writepages - walk the list of dirty pages of the given address space + * & writepage() all of them * @mapping: address space structure to write * @wbc: subtract the number of written pages from *@wbc->nr_to_write * @get_block: the filesystem's block mapper function. + * @write_folio: handler to call for each folio before calling + * mpage_write_folio() * * This is a library function, which implements the writepages() - * address_space_operation. + * address_space_operation. It calls @write_folio handler for each folio. If + * the handler returns value > 0, it calls mpage_write_folio() to do the + * folio writeback. */ int -mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block) +__mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)) { struct mpage_data mpd = { .get_block = get_block, @@ -666,11 +673,22 @@ mpage_writepages(struct address_space *mapping, int error; blk_start_plug(&plug); - while ((folio = writeback_iter(mapping, wbc, folio, &error))) + while ((folio = writeback_iter(mapping, wbc, folio, &error))) { + if (write_folio) { + error = write_folio(folio, wbc); + /* + * == 0 means folio is handled, < 0 means error. In + * both cases hand back control to writeback_iter() + */ + if (error <= 0) + continue; + /* Let mpage_write_folio() handle the folio. */ + } error = mpage_write_folio(wbc, folio, &mpd); + } if (mpd.bio) mpage_bio_submit_write(mpd.bio); blk_finish_plug(&plug); return error; } -EXPORT_SYMBOL(mpage_writepages); +EXPORT_SYMBOL(__mpage_writepages); diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 1bdc39daac0a..358946990bfa 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -17,7 +17,14 @@ struct readahead_control; void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); -int mpage_writepages(struct address_space *mapping, - struct writeback_control *wbc, get_block_t get_block); +int __mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block, + int (*write_folio)(struct folio *folio, + struct writeback_control *wbc)); +static inline int mpage_writepages(struct address_space *mapping, + struct writeback_control *wbc, get_block_t get_block) +{ + return __mpage_writepages(mapping, wbc, get_block, NULL); +} #endif -- cgit v1.2.3 From a664bf3d603dc3bdcf9ae47cc21e0daec706d7a5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 26 Mar 2026 15:30:20 +0900 Subject: crypto: algif_aead - Revert to operating out-of-place This mostly reverts commit 72548b093ee3 except for the copying of the associated data. There is no benefit in operating in-place in algif_aead since the source and destination come from different mappings. Get rid of all the complexity added for in-place operation and just copy the AD directly. Fixes: 72548b093ee3 ("crypto: algif_aead - copy AAD from src to dst") Reported-by: Taeyang Lee <0wn@theori.io> Signed-off-by: Herbert Xu --- crypto/af_alg.c | 49 +++++------------------- crypto/algif_aead.c | 100 +++++++++--------------------------------------- crypto/algif_skcipher.c | 6 +-- include/crypto/if_alg.h | 5 +-- 4 files changed, 34 insertions(+), 126 deletions(-) (limited to 'include') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index c2fd9cd86c5e..8e0199394984 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -637,15 +637,13 @@ static int af_alg_alloc_tsgl(struct sock *sk) /** * af_alg_count_tsgl - Count number of TX SG entries * - * The counting starts from the beginning of the SGL to @bytes. If - * an @offset is provided, the counting of the SG entries starts at the @offset. + * The counting starts from the beginning of the SGL to @bytes. * * @sk: socket of connection to user space * @bytes: Count the number of SG entries holding given number of bytes. - * @offset: Start the counting of SG entries from the given offset. * Return: Number of TX SG entries found given the constraints */ -unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset) +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes) { const struct alg_sock *ask = alg_sk(sk); const struct af_alg_ctx *ctx = ask->private; @@ -660,25 +658,11 @@ unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset) const struct scatterlist *sg = sgl->sg; for (i = 0; i < sgl->cur; i++) { - size_t bytes_count; - - /* Skip offset */ - if (offset >= sg[i].length) { - offset -= sg[i].length; - bytes -= sg[i].length; - continue; - } - - bytes_count = sg[i].length - offset; - - offset = 0; sgl_count++; - - /* If we have seen requested number of bytes, stop */ - if (bytes_count >= bytes) + if (sg[i].length >= bytes) return sgl_count; - bytes -= bytes_count; + bytes -= sg[i].length; } } @@ -690,19 +674,14 @@ EXPORT_SYMBOL_GPL(af_alg_count_tsgl); * af_alg_pull_tsgl - Release the specified buffers from TX SGL * * If @dst is non-null, reassign the pages to @dst. The caller must release - * the pages. If @dst_offset is given only reassign the pages to @dst starting - * at the @dst_offset (byte). The caller must ensure that @dst is large - * enough (e.g. by using af_alg_count_tsgl with the same offset). + * the pages. * * @sk: socket of connection to user space * @used: Number of bytes to pull from TX SGL * @dst: If non-NULL, buffer is reassigned to dst SGL instead of releasing. The * caller must release the buffers in dst. - * @dst_offset: Reassign the TX SGL from given offset. All buffers before - * reaching the offset is released. */ -void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, - size_t dst_offset) +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst) { struct alg_sock *ask = alg_sk(sk); struct af_alg_ctx *ctx = ask->private; @@ -727,18 +706,10 @@ void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, * SG entries in dst. */ if (dst) { - if (dst_offset >= plen) { - /* discard page before offset */ - dst_offset -= plen; - } else { - /* reassign page to dst after offset */ - get_page(page); - sg_set_page(dst + j, page, - plen - dst_offset, - sg[i].offset + dst_offset); - dst_offset = 0; - j++; - } + /* reassign page to dst after offset */ + get_page(page); + sg_set_page(dst + j, page, plen, sg[i].offset); + j++; } sg[i].length -= plen; diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 79b016a899a1..dda15bb05e89 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -72,9 +71,8 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, struct alg_sock *pask = alg_sk(psk); struct af_alg_ctx *ctx = ask->private; struct crypto_aead *tfm = pask->private; - unsigned int i, as = crypto_aead_authsize(tfm); + unsigned int as = crypto_aead_authsize(tfm); struct af_alg_async_req *areq; - struct af_alg_tsgl *tsgl, *tmp; struct scatterlist *rsgl_src, *tsgl_src = NULL; int err = 0; size_t used = 0; /* [in] TX bufs to be en/decrypted */ @@ -154,23 +152,24 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, outlen -= less; } + /* + * Create a per request TX SGL for this request which tracks the + * SG entries from the global TX SGL. + */ processed = used + ctx->aead_assoclen; - list_for_each_entry_safe(tsgl, tmp, &ctx->tsgl_list, list) { - for (i = 0; i < tsgl->cur; i++) { - struct scatterlist *process_sg = tsgl->sg + i; - - if (!(process_sg->length) || !sg_page(process_sg)) - continue; - tsgl_src = process_sg; - break; - } - if (tsgl_src) - break; - } - if (processed && !tsgl_src) { - err = -EFAULT; + areq->tsgl_entries = af_alg_count_tsgl(sk, processed); + if (!areq->tsgl_entries) + areq->tsgl_entries = 1; + areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl), + areq->tsgl_entries), + GFP_KERNEL); + if (!areq->tsgl) { + err = -ENOMEM; goto free; } + sg_init_table(areq->tsgl, areq->tsgl_entries); + af_alg_pull_tsgl(sk, processed, areq->tsgl); + tsgl_src = areq->tsgl; /* * Copy of AAD from source to destination @@ -179,76 +178,15 @@ static int _aead_recvmsg(struct socket *sock, struct msghdr *msg, * when user space uses an in-place cipher operation, the kernel * will copy the data as it does not see whether such in-place operation * is initiated. - * - * To ensure efficiency, the following implementation ensure that the - * ciphers are invoked to perform a crypto operation in-place. This - * is achieved by memory management specified as follows. */ /* Use the RX SGL as source (and destination) for crypto op. */ rsgl_src = areq->first_rsgl.sgl.sgt.sgl; - if (ctx->enc) { - /* - * Encryption operation - The in-place cipher operation is - * achieved by the following operation: - * - * TX SGL: AAD || PT - * | | - * | copy | - * v v - * RX SGL: AAD || PT || Tag - */ - memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src, - processed); - af_alg_pull_tsgl(sk, processed, NULL, 0); - } else { - /* - * Decryption operation - To achieve an in-place cipher - * operation, the following SGL structure is used: - * - * TX SGL: AAD || CT || Tag - * | | ^ - * | copy | | Create SGL link. - * v v | - * RX SGL: AAD || CT ----+ - */ - - /* Copy AAD || CT to RX SGL buffer for in-place operation. */ - memcpy_sglist(areq->first_rsgl.sgl.sgt.sgl, tsgl_src, outlen); - - /* Create TX SGL for tag and chain it to RX SGL. */ - areq->tsgl_entries = af_alg_count_tsgl(sk, processed, - processed - as); - if (!areq->tsgl_entries) - areq->tsgl_entries = 1; - areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl), - areq->tsgl_entries), - GFP_KERNEL); - if (!areq->tsgl) { - err = -ENOMEM; - goto free; - } - sg_init_table(areq->tsgl, areq->tsgl_entries); - - /* Release TX SGL, except for tag data and reassign tag data. */ - af_alg_pull_tsgl(sk, processed, areq->tsgl, processed - as); - - /* chain the areq TX SGL holding the tag with RX SGL */ - if (usedpages) { - /* RX SGL present */ - struct af_alg_sgl *sgl_prev = &areq->last_rsgl->sgl; - struct scatterlist *sg = sgl_prev->sgt.sgl; - - sg_unmark_end(sg + sgl_prev->sgt.nents - 1); - sg_chain(sg, sgl_prev->sgt.nents + 1, areq->tsgl); - } else - /* no RX SGL present (e.g. authentication only) */ - rsgl_src = areq->tsgl; - } + memcpy_sglist(rsgl_src, tsgl_src, ctx->aead_assoclen); /* Initialize the crypto operation */ - aead_request_set_crypt(&areq->cra_u.aead_req, rsgl_src, + aead_request_set_crypt(&areq->cra_u.aead_req, tsgl_src, areq->first_rsgl.sgl.sgt.sgl, used, ctx->iv); aead_request_set_ad(&areq->cra_u.aead_req, ctx->aead_assoclen); aead_request_set_tfm(&areq->cra_u.aead_req, tfm); @@ -450,7 +388,7 @@ static void aead_sock_destruct(struct sock *sk) struct crypto_aead *tfm = pask->private; unsigned int ivlen = crypto_aead_ivsize(tfm); - af_alg_pull_tsgl(sk, ctx->used, NULL, 0); + af_alg_pull_tsgl(sk, ctx->used, NULL); sock_kzfree_s(sk, ctx->iv, ivlen); sock_kfree_s(sk, ctx, ctx->len); af_alg_release_parent(sk); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 125d395c5e00..82735e51be10 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -138,7 +138,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, * Create a per request TX SGL for this request which tracks the * SG entries from the global TX SGL. */ - areq->tsgl_entries = af_alg_count_tsgl(sk, len, 0); + areq->tsgl_entries = af_alg_count_tsgl(sk, len); if (!areq->tsgl_entries) areq->tsgl_entries = 1; areq->tsgl = sock_kmalloc(sk, array_size(sizeof(*areq->tsgl), @@ -149,7 +149,7 @@ static int _skcipher_recvmsg(struct socket *sock, struct msghdr *msg, goto free; } sg_init_table(areq->tsgl, areq->tsgl_entries); - af_alg_pull_tsgl(sk, len, areq->tsgl, 0); + af_alg_pull_tsgl(sk, len, areq->tsgl); /* Initialize the crypto operation */ skcipher_request_set_tfm(&areq->cra_u.skcipher_req, tfm); @@ -363,7 +363,7 @@ static void skcipher_sock_destruct(struct sock *sk) struct alg_sock *pask = alg_sk(psk); struct crypto_skcipher *tfm = pask->private; - af_alg_pull_tsgl(sk, ctx->used, NULL, 0); + af_alg_pull_tsgl(sk, ctx->used, NULL); sock_kzfree_s(sk, ctx->iv, crypto_skcipher_ivsize(tfm)); if (ctx->state) sock_kzfree_s(sk, ctx->state, crypto_skcipher_statesize(tfm)); diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 107b797c33ec..0cc8fa749f68 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -230,9 +230,8 @@ static inline bool af_alg_readable(struct sock *sk) return PAGE_SIZE <= af_alg_rcvbuf(sk); } -unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes, size_t offset); -void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst, - size_t dst_offset); +unsigned int af_alg_count_tsgl(struct sock *sk, size_t bytes); +void af_alg_pull_tsgl(struct sock *sk, size_t used, struct scatterlist *dst); void af_alg_wmem_wakeup(struct sock *sk); int af_alg_wait_for_data(struct sock *sk, unsigned flags, unsigned min); int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size, -- cgit v1.2.3 From c76fef7dcd9372e3476d4df5e0a72ed5919a814b Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 31 Mar 2026 23:10:20 +0200 Subject: bpf: Fix grace period wait for tracepoint bpf_link Recently, tracepoints were switched from using disabled preemption (which acts as RCU read section) to SRCU-fast when they are not faultable. This means that to do a proper grace period wait for programs running in such tracepoints, we must use SRCU's grace period wait. This is only for non-faultable tracepoints, faultable ones continue using RCU Tasks Trace. However, bpf_link_free() currently does call_rcu() for all cases when the link is non-sleepable (hence, for tracepoints, non-faultable). Fix this by doing a call_srcu() grace period wait. As far RCU Tasks Trace gp -> RCU gp chaining is concerned, it is deemed unnecessary for tracepoint programs. The link and program are either accessed under RCU Tasks Trace protection, or SRCU-fast protection now. The earlier logic of chaining both RCU Tasks Trace and RCU gp waits was to generalize the logic, even if it conceded an extra RCU gp wait, however that is unnecessary for tracepoints even before this change. In practice no cost was paid since rcu_trace_implies_rcu_gp() was always true. Hence we need not chaining any RCU gp after the SRCU gp. For instance, in the non-faultable raw tracepoint, the RCU read section of the program in __bpf_trace_run() is enclosed in the SRCU gp, likewise for faultable raw tracepoint, the program is under the RCU Tasks Trace protection. Hence, the outermost scope can be waited upon to ensure correctness. Also, sleepable programs cannot be attached to non-faultable tracepoints, so whenever program or link is sleepable, only RCU Tasks Trace protection is being used for the link and prog. Fixes: a46023d5616e ("tracing: Guard __DECLARE_TRACE() use of __DO_TRACE_CALL() with SRCU-fast") Reviewed-by: Sun Jian Reviewed-by: Puranjay Mohan Acked-by: Andrii Nakryiko Signed-off-by: Kumar Kartikeya Dwivedi Acked-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20260331211021.1632902-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++++ include/linux/tracepoint.h | 20 ++++++++++++++++++++ kernel/bpf/syscall.c | 25 +++++++++++++++++++++++-- 3 files changed, 47 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 05b34a6355b0..35b1e25bd104 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1854,6 +1854,10 @@ struct bpf_link_ops { * target hook is sleepable, we'll go through tasks trace RCU GP and * then "classic" RCU GP; this need for chaining tasks trace and * classic RCU GPs is designated by setting bpf_link->sleepable flag + * + * For non-sleepable tracepoint links we go through SRCU gp instead, + * since RCU is not used in that case. Sleepable tracepoints still + * follow the scheme above. */ void (*dealloc_deferred)(struct bpf_link *link); int (*detach)(struct bpf_link *link); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 22ca1c8b54f3..1d7f29f5e901 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -122,6 +122,22 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return tp->ext && tp->ext->faultable; } +/* + * Run RCU callback with the appropriate grace period wait for non-faultable + * tracepoints, e.g., those used in atomic context. + */ +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ + call_srcu(&tracepoint_srcu, rcu, func); +} +/* + * Run RCU callback with the appropriate grace period wait for faultable + * tracepoints, e.g., those used in syscall context. + */ +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ + call_rcu_tasks_trace(rcu, func); +} #else static inline void tracepoint_synchronize_unregister(void) { } @@ -129,6 +145,10 @@ static inline bool tracepoint_is_faultable(struct tracepoint *tp) { return false; } +static inline void call_tracepoint_unregister_atomic(struct rcu_head *rcu, rcu_callback_t func) +{ } +static inline void call_tracepoint_unregister_syscall(struct rcu_head *rcu, rcu_callback_t func) +{ } #endif #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 274039e36465..700938782bed 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3261,6 +3261,18 @@ static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu) bpf_link_dealloc(link); } +static bool bpf_link_is_tracepoint(struct bpf_link *link) +{ + /* + * Only these combinations support a tracepoint bpf_link. + * BPF_LINK_TYPE_TRACING raw_tp progs are hardcoded to use + * bpf_raw_tp_link_lops and thus dealloc_deferred(), see + * bpf_raw_tp_link_attach(). + */ + return link->type == BPF_LINK_TYPE_RAW_TRACEPOINT || + (link->type == BPF_LINK_TYPE_TRACING && link->attach_type == BPF_TRACE_RAW_TP); +} + static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) { if (rcu_trace_implies_rcu_gp()) @@ -3279,16 +3291,25 @@ static void bpf_link_free(struct bpf_link *link) if (link->prog) ops->release(link); if (ops->dealloc_deferred) { - /* Schedule BPF link deallocation, which will only then + /* + * Schedule BPF link deallocation, which will only then * trigger putting BPF program refcount. * If underlying BPF program is sleepable or BPF link's target * attach hookpoint is sleepable or otherwise requires RCU GPs * to ensure link and its underlying BPF program is not * reachable anymore, we need to first wait for RCU tasks - * trace sync, and then go through "classic" RCU grace period + * trace sync, and then go through "classic" RCU grace period. + * + * For tracepoint BPF links, we need to go through SRCU grace + * period wait instead when non-faultable tracepoint is used. We + * don't need to chain SRCU grace period waits, however, for the + * faultable case, since it exclusively uses RCU Tasks Trace. */ if (link->sleepable || (link->prog && link->prog->sleepable)) call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); + /* We need to do a SRCU grace period wait for non-faultable tracepoint BPF links. */ + else if (bpf_link_is_tracepoint(link)) + call_tracepoint_unregister_atomic(&link->rcu, bpf_link_defer_dealloc_rcu_gp); else call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); } else if (ops->dealloc) { -- cgit v1.2.3 From 8c0ef7b56d6bbbc53f2d43d99c195144f01b0775 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 11 Mar 2026 22:14:00 -0700 Subject: lis3lv02d: fix kernel-doc warnings Use the correct kernel-doc format to avoid kernel-doc warnings: Warning: include/linux/lis3lv02d.h:125 struct member 'st_min_limits' not described in 'lis3lv02d_platform_data' Warning: include/linux/lis3lv02d.h:125 struct member 'st_max_limits' not described in 'lis3lv02d_platform_data' Signed-off-by: Randy Dunlap Link: https://patch.msgid.link/20260312051400.682991-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/linux/lis3lv02d.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h index b72b8cdba765..feb60ba4e30e 100644 --- a/include/linux/lis3lv02d.h +++ b/include/linux/lis3lv02d.h @@ -30,8 +30,8 @@ * @default_rate: Default sampling rate. 0 means reset default * @setup_resources: Interrupt line setup call back function * @release_resources: Interrupt line release call back function - * @st_min_limits[3]: Selftest acceptance minimum values - * @st_max_limits[3]: Selftest acceptance maximum values + * @st_min_limits: Selftest acceptance minimum values (x, y, z) + * @st_max_limits: Selftest acceptance maximum values (x, y, z) * @irq2: Irq line 2 number * * Platform data is used to setup the sensor chip. Meaning of the different -- cgit v1.2.3 From b7e8590987aa94c9dc51518fad0e58cb887b1db5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 30 Mar 2026 14:16:34 +0200 Subject: netfilter: ipset: use nla_strcmp for IPSET_ATTR_NAME attr IPSET_ATTR_NAME and IPSET_ATTR_NAMEREF are of NLA_STRING type, they cannot be treated like a c-string. They either have to be switched to NLA_NUL_STRING, or the compare operations need to use the nla functions. Fixes: f830837f0eed ("netfilter: ipset: list:set set type support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 2 +- net/netfilter/ipset/ip_set_core.c | 4 ++-- net/netfilter/ipset/ip_set_list_set.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index e9f4f845d760..b98331572ad2 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -309,7 +309,7 @@ enum { /* register and unregister set references */ extern ip_set_id_t ip_set_get_byname(struct net *net, - const char *name, struct ip_set **set); + const struct nlattr *name, struct ip_set **set); extern void ip_set_put_byindex(struct net *net, ip_set_id_t index); extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name); extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index a2fe711cb5e3..d0c9fe59c67d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -821,7 +821,7 @@ EXPORT_SYMBOL_GPL(ip_set_del); * */ ip_set_id_t -ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) +ip_set_get_byname(struct net *net, const struct nlattr *name, struct ip_set **set) { ip_set_id_t i, index = IPSET_INVALID_ID; struct ip_set *s; @@ -830,7 +830,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set) rcu_read_lock(); for (i = 0; i < inst->ip_set_max; i++) { s = rcu_dereference(inst->ip_set_list)[i]; - if (s && STRNCMP(s->name, name)) { + if (s && nla_strcmp(name, s->name) == 0) { __ip_set_get(s); index = i; *set = s; diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 98b91b34c314..1cef84f15e8c 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -367,7 +367,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; - e.id = ip_set_get_byname(map->net, nla_data(tb[IPSET_ATTR_NAME]), &s); + e.id = ip_set_get_byname(map->net, tb[IPSET_ATTR_NAME], &s); if (e.id == IPSET_INVALID_ID) return -IPSET_ERR_NAME; /* "Loop detection" */ @@ -389,7 +389,7 @@ list_set_uadt(struct ip_set *set, struct nlattr *tb[], if (tb[IPSET_ATTR_NAMEREF]) { e.refid = ip_set_get_byname(map->net, - nla_data(tb[IPSET_ATTR_NAMEREF]), + tb[IPSET_ATTR_NAMEREF], &s); if (e.refid == IPSET_INVALID_ID) { ret = -IPSET_ERR_NAMEREF; -- cgit v1.2.3 From bd3d245b0fef571f93504904df62b8865b1c0d34 Mon Sep 17 00:00:00 2001 From: Guan-Yu Lin Date: Wed, 1 Apr 2026 12:32:17 +0000 Subject: usb: core: use dedicated spinlock for offload state Replace the coarse USB device lock with a dedicated offload_lock spinlock to reduce contention during offload operations. Use offload_pm_locked to synchronize with PM transitions and replace the legacy offload_at_suspend flag. Optimize usb_offload_get/put by switching from auto-resume/suspend to pm_runtime_get_if_active(). This ensures offload state is only modified when the device is already active, avoiding unnecessary power transitions. Cc: stable Fixes: ef82a4803aab ("xhci: sideband: add api to trace sideband usage") Signed-off-by: Guan-Yu Lin Tested-by: Hailong Liu Acked-by: Mathias Nyman Link: https://patch.msgid.link/20260401123238.3790062-2-guanyulin@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/driver.c | 23 +++++---- drivers/usb/core/offload.c | 102 ++++++++++++++++++++++----------------- drivers/usb/core/usb.c | 1 + drivers/usb/host/xhci-sideband.c | 4 +- include/linux/usb.h | 10 +++- 5 files changed, 84 insertions(+), 56 deletions(-) (limited to 'include') diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c index 2574e65bc640..f63004417058 100644 --- a/drivers/usb/core/driver.c +++ b/drivers/usb/core/driver.c @@ -1415,14 +1415,16 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) int status = 0; int i = 0, n = 0; struct usb_interface *intf; + bool offload_active = false; if (udev->state == USB_STATE_NOTATTACHED || udev->state == USB_STATE_SUSPENDED) goto done; + usb_offload_set_pm_locked(udev, true); if (msg.event == PM_EVENT_SUSPEND && usb_offload_check(udev)) { dev_dbg(&udev->dev, "device offloaded, skip suspend.\n"); - udev->offload_at_suspend = 1; + offload_active = true; } /* Suspend all the interfaces and then udev itself */ @@ -1436,8 +1438,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) * interrupt urbs, allowing interrupt events to be * handled during system suspend. */ - if (udev->offload_at_suspend && - intf->needs_remote_wakeup) { + if (offload_active && intf->needs_remote_wakeup) { dev_dbg(&intf->dev, "device offloaded, skip suspend.\n"); continue; @@ -1452,7 +1453,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) } } if (status == 0) { - if (!udev->offload_at_suspend) + if (!offload_active) status = usb_suspend_device(udev, msg); /* @@ -1498,7 +1499,7 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) */ } else { udev->can_submit = 0; - if (!udev->offload_at_suspend) { + if (!offload_active) { for (i = 0; i < 16; ++i) { usb_hcd_flush_endpoint(udev, udev->ep_out[i]); usb_hcd_flush_endpoint(udev, udev->ep_in[i]); @@ -1507,6 +1508,8 @@ static int usb_suspend_both(struct usb_device *udev, pm_message_t msg) } done: + if (status != 0) + usb_offload_set_pm_locked(udev, false); dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); return status; } @@ -1536,16 +1539,19 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) int status = 0; int i; struct usb_interface *intf; + bool offload_active = false; if (udev->state == USB_STATE_NOTATTACHED) { status = -ENODEV; goto done; } udev->can_submit = 1; + if (msg.event == PM_EVENT_RESUME) + offload_active = usb_offload_check(udev); /* Resume the device */ if (udev->state == USB_STATE_SUSPENDED || udev->reset_resume) { - if (!udev->offload_at_suspend) + if (!offload_active) status = usb_resume_device(udev, msg); else dev_dbg(&udev->dev, @@ -1562,8 +1568,7 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) * pending interrupt urbs, allowing interrupt events * to be handled during system suspend. */ - if (udev->offload_at_suspend && - intf->needs_remote_wakeup) { + if (offload_active && intf->needs_remote_wakeup) { dev_dbg(&intf->dev, "device offloaded, skip resume.\n"); continue; @@ -1572,11 +1577,11 @@ static int usb_resume_both(struct usb_device *udev, pm_message_t msg) udev->reset_resume); } } - udev->offload_at_suspend = 0; usb_mark_last_busy(udev); done: dev_vdbg(&udev->dev, "%s: status %d\n", __func__, status); + usb_offload_set_pm_locked(udev, false); if (!status) udev->reset_resume = 0; return status; diff --git a/drivers/usb/core/offload.c b/drivers/usb/core/offload.c index 7c699f1b8d2b..9db3cfedd29c 100644 --- a/drivers/usb/core/offload.c +++ b/drivers/usb/core/offload.c @@ -25,33 +25,30 @@ */ int usb_offload_get(struct usb_device *udev) { - int ret; + int ret = 0; - usb_lock_device(udev); - if (udev->state == USB_STATE_NOTATTACHED) { - usb_unlock_device(udev); + if (!usb_get_dev(udev)) return -ENODEV; - } - if (udev->state == USB_STATE_SUSPENDED || - udev->offload_at_suspend) { - usb_unlock_device(udev); - return -EBUSY; + if (pm_runtime_get_if_active(&udev->dev) != 1) { + ret = -EBUSY; + goto err_rpm; } - /* - * offload_usage could only be modified when the device is active, since - * it will alter the suspend flow of the device. - */ - ret = usb_autoresume_device(udev); - if (ret < 0) { - usb_unlock_device(udev); - return ret; + spin_lock(&udev->offload_lock); + + if (udev->offload_pm_locked) { + ret = -EAGAIN; + goto err; } udev->offload_usage++; - usb_autosuspend_device(udev); - usb_unlock_device(udev); + +err: + spin_unlock(&udev->offload_lock); + pm_runtime_put_autosuspend(&udev->dev); +err_rpm: + usb_put_dev(udev); return ret; } @@ -69,35 +66,32 @@ EXPORT_SYMBOL_GPL(usb_offload_get); */ int usb_offload_put(struct usb_device *udev) { - int ret; + int ret = 0; - usb_lock_device(udev); - if (udev->state == USB_STATE_NOTATTACHED) { - usb_unlock_device(udev); + if (!usb_get_dev(udev)) return -ENODEV; - } - if (udev->state == USB_STATE_SUSPENDED || - udev->offload_at_suspend) { - usb_unlock_device(udev); - return -EBUSY; + if (pm_runtime_get_if_active(&udev->dev) != 1) { + ret = -EBUSY; + goto err_rpm; } - /* - * offload_usage could only be modified when the device is active, since - * it will alter the suspend flow of the device. - */ - ret = usb_autoresume_device(udev); - if (ret < 0) { - usb_unlock_device(udev); - return ret; + spin_lock(&udev->offload_lock); + + if (udev->offload_pm_locked) { + ret = -EAGAIN; + goto err; } /* Drop the count when it wasn't 0, ignore the operation otherwise. */ if (udev->offload_usage) udev->offload_usage--; - usb_autosuspend_device(udev); - usb_unlock_device(udev); + +err: + spin_unlock(&udev->offload_lock); + pm_runtime_put_autosuspend(&udev->dev); +err_rpm: + usb_put_dev(udev); return ret; } @@ -112,25 +106,47 @@ EXPORT_SYMBOL_GPL(usb_offload_put); * management. * * The caller must hold @udev's device lock. In addition, the caller should - * ensure downstream usb devices are all either suspended or marked as - * "offload_at_suspend" to ensure the correctness of the return value. + * ensure the device itself and the downstream usb devices are all marked as + * "offload_pm_locked" to ensure the correctness of the return value. * * Returns true on any offload activity, false otherwise. */ bool usb_offload_check(struct usb_device *udev) __must_hold(&udev->dev->mutex) { struct usb_device *child; - bool active; + bool active = false; int port1; + if (udev->offload_usage) + return true; + usb_hub_for_each_child(udev, port1, child) { usb_lock_device(child); active = usb_offload_check(child); usb_unlock_device(child); + if (active) - return true; + break; } - return !!udev->offload_usage; + return active; } EXPORT_SYMBOL_GPL(usb_offload_check); + +/** + * usb_offload_set_pm_locked - set the PM lock state of a USB device + * @udev: the USB device to modify + * @locked: the new lock state + * + * Setting @locked to true prevents offload_usage from being modified. This + * ensures that offload activities cannot be started or stopped during critical + * power management transitions, maintaining a stable state for the duration + * of the transition. + */ +void usb_offload_set_pm_locked(struct usb_device *udev, bool locked) +{ + spin_lock(&udev->offload_lock); + udev->offload_pm_locked = locked; + spin_unlock(&udev->offload_lock); +} +EXPORT_SYMBOL_GPL(usb_offload_set_pm_locked); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index e9a10a33534c..df166cafe106 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -671,6 +671,7 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; + spin_lock_init(&dev->offload_lock); dev->offload_usage = 0; atomic_set(&dev->urbnum, 0); diff --git a/drivers/usb/host/xhci-sideband.c b/drivers/usb/host/xhci-sideband.c index abbcc0e44f1b..54284d29d201 100644 --- a/drivers/usb/host/xhci-sideband.c +++ b/drivers/usb/host/xhci-sideband.c @@ -291,8 +291,8 @@ EXPORT_SYMBOL_GPL(xhci_sideband_get_event_buffer); * Allow other drivers, such as usb controller driver, to check if there are * any sideband activity on the host controller. This information could be used * for power management or other forms of resource management. The caller should - * ensure downstream usb devices are all either suspended or marked as - * "offload_at_suspend" to ensure the correctness of the return value. + * ensure downstream usb devices are all marked as "offload_pm_locked" to ensure + * the correctness of the return value. * * Returns true on any active sideband existence, false otherwise. */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 04277af4bb9d..4aab20015851 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -21,6 +21,7 @@ #include /* for struct completion */ #include /* for current && schedule_timeout */ #include /* for struct mutex */ +#include /* for spinlock_t */ #include /* for runtime PM */ struct usb_device; @@ -636,8 +637,9 @@ struct usb3_lpm_parameters { * @do_remote_wakeup: remote wakeup should be enabled * @reset_resume: needs reset instead of resume * @port_is_suspended: the upstream port is suspended (L2 or U3) - * @offload_at_suspend: offload activities during suspend is enabled. + * @offload_pm_locked: prevents offload_usage changes during PM transitions. * @offload_usage: number of offload activities happening on this usb device. + * @offload_lock: protects offload_usage and offload_pm_locked * @slot_id: Slot ID assigned by xHCI * @l1_params: best effor service latency for USB2 L1 LPM state, and L1 timeout. * @u1_params: exit latencies for USB3 U1 LPM state, and hub-initiated timeout. @@ -726,8 +728,9 @@ struct usb_device { unsigned do_remote_wakeup:1; unsigned reset_resume:1; unsigned port_is_suspended:1; - unsigned offload_at_suspend:1; + unsigned offload_pm_locked:1; int offload_usage; + spinlock_t offload_lock; enum usb_link_tunnel_mode tunnel_mode; struct device_link *usb4_link; @@ -849,6 +852,7 @@ static inline void usb_mark_last_busy(struct usb_device *udev) int usb_offload_get(struct usb_device *udev); int usb_offload_put(struct usb_device *udev); bool usb_offload_check(struct usb_device *udev); +void usb_offload_set_pm_locked(struct usb_device *udev, bool locked); #else static inline int usb_offload_get(struct usb_device *udev) @@ -857,6 +861,8 @@ static inline int usb_offload_put(struct usb_device *udev) { return 0; } static inline bool usb_offload_check(struct usb_device *udev) { return false; } +static inline void usb_offload_set_pm_locked(struct usb_device *udev, bool locked) +{ } #endif extern int usb_disable_lpm(struct usb_device *udev); -- cgit v1.2.3