From 1211f3b21c2aa0d22d8d7f050e3a5930a91cd0e4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Mar 2024 07:21:02 -1000 Subject: workqueue: Preserve OFFQ bits in cancel[_sync] paths The cancel[_sync] paths acquire and release WORK_STRUCT_PENDING, and manipulate WORK_OFFQ_CANCELING. However, they assume that all the OFFQ bit values except for the pool ID are statically known and don't preserve them, which is not wrong in the current code as the pool ID and CANCELING are the only information carried. However, the planned disable/enable support will add more fields and need them to be preserved. This patch updates work data handling so that only the bits which need updating are updated. - struct work_offq_data is added along with work_offqd_unpack() and work_offqd_pack_flags() to help manipulating multiple fields contained in work->data. Note that the helpers look a bit silly right now as there isn't that much to pack. The next patch will add more. - mark_work_canceling() which is used only by __cancel_work_sync() is replaced by open-coded usage of work_offq_data and set_work_pool_and_keep_pending() in __cancel_work_sync(). - __cancel_work[_sync]() uses offq_data helpers to preserve other OFFQ bits when clearing WORK_STRUCT_PENDING and WORK_OFFQ_CANCELING at the end. - This removes all users of get_work_pool_id() which is dropped. Note that get_work_pool_id() could handle both WORK_STRUCT_PWQ and !WORK_STRUCT_PWQ cases; however, it was only being called after try_to_grab_pending() succeeded, in which case WORK_STRUCT_PWQ is never set and thus it's safe to use work_offqd_unpack() instead. No behavior changes intended. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 158784dd189a..ae7ae4a51499 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -97,6 +97,7 @@ enum wq_misc_consts { /* Convenience constants - of type 'unsigned long', not 'enum'! */ #define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) +#define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) #define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) -- cgit v1.2.3 From 86898fa6b8cd942505860556f3a0bf52eae57fe8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Mar 2024 07:21:03 -1000 Subject: workqueue: Implement disable/enable for (delayed) work items While (delayed) work items could be flushed and canceled, there was no way to prevent them from being queued in the future. While this didn't lead to functional deficiencies, it sometimes required a bit more effort from the workqueue users to e.g. sequence shutdown steps with more care. Workqueue is currently in the process of replacing tasklet which does support disabling and enabling. The feature is used relatively widely to, for example, temporarily suppress main path while a control plane operation (reset or config change) is in progress. To enable easy conversion of tasklet users and as it seems like an inherent useful feature, this patch implements disabling and enabling of work items. - A work item carries 16bit disable count in work->data while not queued. The access to the count is synchronized by the PENDING bit like all other parts of work->data. - If the count is non-zero, the work item cannot be queued. Any attempt to queue the work item fails and returns %false. - disable_work[_sync](), enable_work(), disable_delayed_work[_sync]() and enable_delayed_work() are added. v3: enable_work() was using local_irq_enable() instead of local_irq_restore() to undo IRQ-disable by work_grab_pending(). This is awkward now and will become incorrect as enable_work() will later be used from IRQ context too. (Lai) v2: Lai noticed that queue_work_node() wasn't checking the disable count. Fixed. queue_rcu_work() is updated to trigger warning if the inner work item is disabled. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 18 ++++- kernel/workqueue.c | 177 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 182 insertions(+), 13 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index ae7ae4a51499..bd80e66298a0 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -51,20 +51,23 @@ enum work_bits { * data contains off-queue information when !WORK_STRUCT_PWQ. * * MSB - * [ pool ID ] [ OFFQ flags ] [ STRUCT flags ] - * 1 bit 4 or 5 bits + * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] + * 16 bits 1 bit 4 or 5 bits */ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_FLAG_END, WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, + WORK_OFFQ_DISABLE_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_DISABLE_BITS = 16, + /* * When a work item is off queue, the high bits encode off-queue flags * and the last pool it was on. Cap pool ID to 31 bits and use the * highest number to indicate that no pool is associated. */ - WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_FLAG_SHIFT + WORK_OFFQ_FLAG_BITS, + WORK_OFFQ_POOL_SHIFT = WORK_OFFQ_DISABLE_SHIFT + WORK_OFFQ_DISABLE_BITS, WORK_OFFQ_LEFT = BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS = WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31, }; @@ -98,6 +101,7 @@ enum wq_misc_consts { /* Convenience constants - of type 'unsigned long', not 'enum'! */ #define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) #define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) +#define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) #define WORK_STRUCT_NO_POOL (WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT) #define WORK_STRUCT_PWQ_MASK (~((1ul << WORK_STRUCT_PWQ_SHIFT) - 1)) @@ -560,6 +564,14 @@ extern bool flush_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work(struct delayed_work *dwork); extern bool cancel_delayed_work_sync(struct delayed_work *dwork); +extern bool disable_work(struct work_struct *work); +extern bool disable_work_sync(struct work_struct *work); +extern bool enable_work(struct work_struct *work); + +extern bool disable_delayed_work(struct delayed_work *dwork); +extern bool disable_delayed_work_sync(struct delayed_work *dwork); +extern bool enable_delayed_work(struct delayed_work *dwork); + extern bool flush_rcu_work(struct rcu_work *rwork); extern void workqueue_set_max_active(struct workqueue_struct *wq, diff --git a/kernel/workqueue.c b/kernel/workqueue.c index d8f37cfa9935..5c53dde877fd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -99,6 +99,7 @@ enum worker_flags { enum work_cancel_flags { WORK_CANCEL_DELAYED = 1 << 0, /* canceling a delayed_work */ + WORK_CANCEL_DISABLE = 1 << 1, /* canceling to disable */ }; enum wq_internal_consts { @@ -394,6 +395,7 @@ struct wq_pod_type { struct work_offq_data { u32 pool_id; + u32 disable; u32 flags; }; @@ -908,12 +910,15 @@ static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) offqd->pool_id = shift_and_mask(data, WORK_OFFQ_POOL_SHIFT, WORK_OFFQ_POOL_BITS); + offqd->disable = shift_and_mask(data, WORK_OFFQ_DISABLE_SHIFT, + WORK_OFFQ_DISABLE_BITS); offqd->flags = data & WORK_OFFQ_FLAG_MASK; } static unsigned long work_offqd_pack_flags(struct work_offq_data *offqd) { - return (unsigned long)offqd->flags; + return ((unsigned long)offqd->disable << WORK_OFFQ_DISABLE_SHIFT) | + ((unsigned long)offqd->flags); } static bool work_is_canceling(struct work_struct *work) @@ -2408,6 +2413,21 @@ out: rcu_read_unlock(); } +static bool clear_pending_if_disabled(struct work_struct *work) +{ + unsigned long data = *work_data_bits(work); + struct work_offq_data offqd; + + if (likely((data & WORK_STRUCT_PWQ) || + !(data & WORK_OFFQ_DISABLE_MASK))) + return false; + + work_offqd_unpack(&offqd, data); + set_work_pool_and_clear_pending(work, offqd.pool_id, + work_offqd_pack_flags(&offqd)); + return true; +} + /** * queue_work_on - queue work on specific cpu * @cpu: CPU number to execute work on @@ -2430,7 +2450,8 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, local_irq_save(irq_flags); - if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && + !clear_pending_if_disabled(work)) { __queue_work(cpu, wq, work); ret = true; } @@ -2508,7 +2529,8 @@ bool queue_work_node(int node, struct workqueue_struct *wq, local_irq_save(irq_flags); - if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && + !clear_pending_if_disabled(work)) { int cpu = select_numa_node_cpu(node); __queue_work(cpu, wq, work); @@ -2590,7 +2612,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, /* read the comment in __queue_work() */ local_irq_save(irq_flags); - if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && + !clear_pending_if_disabled(work)) { __queue_delayed_work(cpu, wq, dwork, delay); ret = true; } @@ -2663,7 +2686,12 @@ bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork) { struct work_struct *work = &rwork->work; - if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { + /* + * rcu_work can't be canceled or disabled. Warn if the user reached + * inside @rwork and disabled the inner work. + */ + if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) && + !WARN_ON_ONCE(clear_pending_if_disabled(work))) { rwork->wq = wq; call_rcu_hurry(&rwork->rcu, rcu_work_rcufn); return true; @@ -4268,20 +4296,46 @@ bool flush_rcu_work(struct rcu_work *rwork) } EXPORT_SYMBOL(flush_rcu_work); +static void work_offqd_disable(struct work_offq_data *offqd) +{ + const unsigned long max = (1lu << WORK_OFFQ_DISABLE_BITS) - 1; + + if (likely(offqd->disable < max)) + offqd->disable++; + else + WARN_ONCE(true, "workqueue: work disable count overflowed\n"); +} + +static void work_offqd_enable(struct work_offq_data *offqd) +{ + if (likely(offqd->disable > 0)) + offqd->disable--; + else + WARN_ONCE(true, "workqueue: work disable count underflowed\n"); +} + static bool __cancel_work(struct work_struct *work, u32 cflags) { struct work_offq_data offqd; unsigned long irq_flags; int ret; - do { - ret = try_to_grab_pending(work, cflags, &irq_flags); - } while (unlikely(ret == -EAGAIN)); + if (cflags & WORK_CANCEL_DISABLE) { + ret = work_grab_pending(work, cflags, &irq_flags); + } else { + do { + ret = try_to_grab_pending(work, cflags, &irq_flags); + } while (unlikely(ret == -EAGAIN)); - if (unlikely(ret < 0)) - return false; + if (unlikely(ret < 0)) + return false; + } work_offqd_unpack(&offqd, *work_data_bits(work)); + + if (cflags & WORK_CANCEL_DISABLE) + work_offqd_disable(&offqd); + set_work_pool_and_clear_pending(work, offqd.pool_id, work_offqd_pack_flags(&offqd)); local_irq_restore(irq_flags); @@ -4298,6 +4352,10 @@ static bool __cancel_work_sync(struct work_struct *work, u32 cflags) ret = work_grab_pending(work, cflags, &irq_flags); work_offqd_unpack(&offqd, *work_data_bits(work)); + + if (cflags & WORK_CANCEL_DISABLE) + work_offqd_disable(&offqd); + offqd.flags |= WORK_OFFQ_CANCELING; set_work_pool_and_keep_pending(work, offqd.pool_id, work_offqd_pack_flags(&offqd)); @@ -4397,6 +4455,105 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork) } EXPORT_SYMBOL(cancel_delayed_work_sync); +/** + * disable_work - Disable and cancel a work item + * @work: work item to disable + * + * Disable @work by incrementing its disable count and cancel it if currently + * pending. As long as the disable count is non-zero, any attempt to queue @work + * will fail and return %false. The maximum supported disable depth is 2 to the + * power of %WORK_OFFQ_DISABLE_BITS, currently 65536. + * + * Must be called from a sleepable context. Returns %true if @work was pending, + * %false otherwise. + */ +bool disable_work(struct work_struct *work) +{ + return __cancel_work(work, WORK_CANCEL_DISABLE); +} +EXPORT_SYMBOL_GPL(disable_work); + +/** + * disable_work_sync - Disable, cancel and drain a work item + * @work: work item to disable + * + * Similar to disable_work() but also wait for @work to finish if currently + * executing. + * + * Must be called from a sleepable context. Returns %true if @work was pending, + * %false otherwise. + */ +bool disable_work_sync(struct work_struct *work) +{ + return __cancel_work_sync(work, WORK_CANCEL_DISABLE); +} +EXPORT_SYMBOL_GPL(disable_work_sync); + +/** + * enable_work - Enable a work item + * @work: work item to enable + * + * Undo disable_work[_sync]() by decrementing @work's disable count. @work can + * only be queued if its disable count is 0. + * + * Must be called from a sleepable context. Returns %true if the disable count + * reached 0. Otherwise, %false. + */ +bool enable_work(struct work_struct *work) +{ + struct work_offq_data offqd; + unsigned long irq_flags; + + work_grab_pending(work, 0, &irq_flags); + + work_offqd_unpack(&offqd, *work_data_bits(work)); + work_offqd_enable(&offqd); + set_work_pool_and_clear_pending(work, offqd.pool_id, + work_offqd_pack_flags(&offqd)); + local_irq_restore(irq_flags); + + return !offqd.disable; +} +EXPORT_SYMBOL_GPL(enable_work); + +/** + * disable_delayed_work - Disable and cancel a delayed work item + * @dwork: delayed work item to disable + * + * disable_work() for delayed work items. + */ +bool disable_delayed_work(struct delayed_work *dwork) +{ + return __cancel_work(&dwork->work, + WORK_CANCEL_DELAYED | WORK_CANCEL_DISABLE); +} +EXPORT_SYMBOL_GPL(disable_delayed_work); + +/** + * disable_delayed_work_sync - Disable, cancel and drain a delayed work item + * @dwork: delayed work item to disable + * + * disable_work_sync() for delayed work items. + */ +bool disable_delayed_work_sync(struct delayed_work *dwork) +{ + return __cancel_work_sync(&dwork->work, + WORK_CANCEL_DELAYED | WORK_CANCEL_DISABLE); +} +EXPORT_SYMBOL_GPL(disable_delayed_work_sync); + +/** + * enable_delayed_work - Enable a delayed work item + * @dwork: delayed work item to enable + * + * enable_work() for delayed work items. + */ +bool enable_delayed_work(struct delayed_work *dwork) +{ + return enable_work(&dwork->work); +} +EXPORT_SYMBOL_GPL(enable_delayed_work); + /** * schedule_on_each_cpu - execute a function synchronously on each online CPU * @func: the function to call -- cgit v1.2.3 From f09b10b6f442656524d2ee26e45966401a14f54b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Mar 2024 07:21:03 -1000 Subject: workqueue: Remove WORK_OFFQ_CANCELING cancel[_delayed]_work_sync() guarantees that it can shut down self-requeueing work items. To achieve that, it grabs and then holds WORK_STRUCT_PENDING bit set while flushing the currently executing instance. As the PENDING bit is set, all queueing attempts including the self-requeueing ones fail and once the currently executing instance is flushed, the work item should be idle as long as someone else isn't actively queueing it. This means that the cancel_work_sync path may hold the PENDING bit set while flushing the target work item. This isn't a problem for the queueing path - it can just fail which is the desired effect. It doesn't affect flush. It doesn't matter to cancel_work either as it can just report that the work item has successfully canceled. However, if there's another cancel_work_sync attempt on the work item, it can't simply fail or report success and that would breach the guarantee that it should provide. cancel_work_sync has to wait for and grab that PENDING bit and go through the motions. WORK_OFFQ_CANCELING and wq_cancel_waitq are what implement this cancel_work_sync to cancel_work_sync wait mechanism. When a work item is being canceled, WORK_OFFQ_CANCELING is also set on it and other cancel_work_sync attempts wait on the bit to be cleared using the wait queue. While this works, it's an isolated wart which doesn't jive with the rest of flush and cancel mechanisms and forces enable_work() and disable_work() to require a sleepable context, which hampers their usability. Now that a work item can be disabled, we can use that to block queueing while cancel_work_sync is in progress. Instead of holding PENDING the bit, it can temporarily disable the work item, flush and then re-enable it as that'd achieve the same end result of blocking queueings while canceling and thus enable canceling of self-requeueing work items. - WORK_OFFQ_CANCELING and the surrounding mechanims are removed. - work_grab_pending() is now simpler, no longer has to wait for a blocking operation and thus can be called from any context. - With work_grab_pending() simplified, no need to use try_to_grab_pending() directly. All users are converted to use work_grab_pending(). - __cancel_work_sync() is updated to __cancel_work() with WORK_CANCEL_DISABLE to cancel and plug racing queueing attempts. It then flushes and re-enables the work item if necessary. - These changes allow disable_work() and enable_work() to be called from any context. v2: Lai pointed out that mod_delayed_work_on() needs to check the disable count before queueing the delayed work item. Added clear_pending_if_disabled() call. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 4 +- kernel/workqueue.c | 140 +++++++--------------------------------------- 2 files changed, 20 insertions(+), 124 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bd80e66298a0..a5075969931b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -52,10 +52,9 @@ enum work_bits { * * MSB * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] - * 16 bits 1 bit 4 or 5 bits + * 16 bits 0 bits 4 or 5 bits */ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, - WORK_OFFQ_CANCELING_BIT = WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_FLAG_END, WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, @@ -99,7 +98,6 @@ enum wq_misc_consts { }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -#define WORK_OFFQ_CANCELING (1ul << WORK_OFFQ_CANCELING_BIT) #define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) #define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5c53dde877fd..e80a815ec172 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -496,12 +496,6 @@ static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; /* I: attributes used when instantiating ordered pools on demand */ static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; -/* - * Used to synchronize multiple cancel_sync attempts on the same work item. See - * work_grab_pending() and __cancel_work_sync(). - */ -static DECLARE_WAIT_QUEUE_HEAD(wq_cancel_waitq); - /* * I: kthread_worker to release pwq's. pwq release needs to be bounced to a * process context while holding a pool lock. Bounce to a dedicated kthread @@ -783,11 +777,6 @@ static int work_next_color(int color) * corresponding to a work. Pool is available once the work has been * queued anywhere after initialization until it is sync canceled. pwq is * available only while the work item is queued. - * - * %WORK_OFFQ_CANCELING is used to mark a work item which is being - * canceled. While being canceled, a work item may have its PENDING set - * but stay off timer and worklist for arbitrarily long and nobody should - * try to steal the PENDING bit. */ static inline void set_work_data(struct work_struct *work, unsigned long data) { @@ -921,13 +910,6 @@ static unsigned long work_offqd_pack_flags(struct work_offq_data *offqd) ((unsigned long)offqd->flags); } -static bool work_is_canceling(struct work_struct *work) -{ - unsigned long data = atomic_long_read(&work->data); - - return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING); -} - /* * Policy functions. These define the policies on how the global worker * pools are managed. Unless noted otherwise, these functions assume that @@ -2058,8 +2040,6 @@ out_put: * 1 if @work was pending and we successfully stole PENDING * 0 if @work was idle and we claimed PENDING * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry - * -ENOENT if someone else is canceling @work, this state may persist - * for arbitrarily long * ======== ================================================================ * * Note: @@ -2155,26 +2135,9 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags, fail: rcu_read_unlock(); local_irq_restore(*irq_flags); - if (work_is_canceling(work)) - return -ENOENT; - cpu_relax(); return -EAGAIN; } -struct cwt_wait { - wait_queue_entry_t wait; - struct work_struct *work; -}; - -static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) -{ - struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait); - - if (cwait->work != key) - return 0; - return autoremove_wake_function(wait, mode, sync, key); -} - /** * work_grab_pending - steal work item from worklist and disable irq * @work: work item to steal @@ -2184,7 +2147,7 @@ static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *k * Grab PENDING bit of @work. @work can be in any stable state - idle, on timer * or on worklist. * - * Must be called in process context. IRQ is disabled on return with IRQ state + * Can be called from any context. IRQ is disabled on return with IRQ state * stored in *@irq_flags. The caller is responsible for re-enabling it using * local_irq_restore(). * @@ -2193,41 +2156,14 @@ static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *k static bool work_grab_pending(struct work_struct *work, u32 cflags, unsigned long *irq_flags) { - struct cwt_wait cwait; int ret; - might_sleep(); -repeat: - ret = try_to_grab_pending(work, cflags, irq_flags); - if (likely(ret >= 0)) - return ret; - if (ret != -ENOENT) - goto repeat; - - /* - * Someone is already canceling. Wait for it to finish. flush_work() - * doesn't work for PREEMPT_NONE because we may get woken up between - * @work's completion and the other canceling task resuming and clearing - * CANCELING - flush_work() will return false immediately as @work is no - * longer busy, try_to_grab_pending() will return -ENOENT as @work is - * still being canceled and the other canceling task won't be able to - * clear CANCELING as we're hogging the CPU. - * - * Let's wait for completion using a waitqueue. As this may lead to the - * thundering herd problem, use a custom wake function which matches - * @work along with exclusive wait and wakeup. - */ - init_wait(&cwait.wait); - cwait.wait.func = cwt_wakefn; - cwait.work = work; - - prepare_to_wait_exclusive(&wq_cancel_waitq, &cwait.wait, - TASK_UNINTERRUPTIBLE); - if (work_is_canceling(work)) - schedule(); - finish_wait(&wq_cancel_waitq, &cwait.wait); - - goto repeat; + while (true) { + ret = try_to_grab_pending(work, cflags, irq_flags); + if (ret >= 0) + return ret; + cpu_relax(); + } } /** @@ -2645,19 +2581,14 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *dwork, unsigned long delay) { unsigned long irq_flags; - int ret; + bool ret; - do { - ret = try_to_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, - &irq_flags); - } while (unlikely(ret == -EAGAIN)); + ret = work_grab_pending(&dwork->work, WORK_CANCEL_DELAYED, &irq_flags); - if (likely(ret >= 0)) { + if (!clear_pending_if_disabled(&dwork->work)) __queue_delayed_work(cpu, wq, dwork, delay); - local_irq_restore(irq_flags); - } - /* -ENOENT from try_to_grab_pending() becomes %true */ + local_irq_restore(irq_flags); return ret; } EXPORT_SYMBOL_GPL(mod_delayed_work_on); @@ -4320,16 +4251,7 @@ static bool __cancel_work(struct work_struct *work, u32 cflags) unsigned long irq_flags; int ret; - if (cflags & WORK_CANCEL_DISABLE) { - ret = work_grab_pending(work, cflags, &irq_flags); - } else { - do { - ret = try_to_grab_pending(work, cflags, &irq_flags); - } while (unlikely(ret == -EAGAIN)); - - if (unlikely(ret < 0)) - return false; - } + ret = work_grab_pending(work, cflags, &irq_flags); work_offqd_unpack(&offqd, *work_data_bits(work)); @@ -4344,22 +4266,9 @@ static bool __cancel_work(struct work_struct *work, u32 cflags) static bool __cancel_work_sync(struct work_struct *work, u32 cflags) { - struct work_offq_data offqd; - unsigned long irq_flags; bool ret; - /* claim @work and tell other tasks trying to grab @work to back off */ - ret = work_grab_pending(work, cflags, &irq_flags); - - work_offqd_unpack(&offqd, *work_data_bits(work)); - - if (cflags & WORK_CANCEL_DISABLE) - work_offqd_disable(&offqd); - - offqd.flags |= WORK_OFFQ_CANCELING; - set_work_pool_and_keep_pending(work, offqd.pool_id, - work_offqd_pack_flags(&offqd)); - local_irq_restore(irq_flags); + ret = __cancel_work(work, cflags | WORK_CANCEL_DISABLE); /* * Skip __flush_work() during early boot when we know that @work isn't @@ -4368,19 +4277,8 @@ static bool __cancel_work_sync(struct work_struct *work, u32 cflags) if (wq_online) __flush_work(work, true); - work_offqd_unpack(&offqd, *work_data_bits(work)); - - /* - * smp_mb() at the end of set_work_pool_and_clear_pending() is paired - * with prepare_to_wait() above so that either waitqueue_active() is - * visible here or !work_is_canceling() is visible there. - */ - offqd.flags &= ~WORK_OFFQ_CANCELING; - set_work_pool_and_clear_pending(work, WORK_OFFQ_POOL_NONE, - work_offqd_pack_flags(&offqd)); - - if (waitqueue_active(&wq_cancel_waitq)) - __wake_up(&wq_cancel_waitq, TASK_NORMAL, 1, work); + if (!(cflags & WORK_CANCEL_DISABLE)) + enable_work(work); return ret; } @@ -4464,8 +4362,8 @@ EXPORT_SYMBOL(cancel_delayed_work_sync); * will fail and return %false. The maximum supported disable depth is 2 to the * power of %WORK_OFFQ_DISABLE_BITS, currently 65536. * - * Must be called from a sleepable context. Returns %true if @work was pending, - * %false otherwise. + * Can be called from any context. Returns %true if @work was pending, %false + * otherwise. */ bool disable_work(struct work_struct *work) { @@ -4496,8 +4394,8 @@ EXPORT_SYMBOL_GPL(disable_work_sync); * Undo disable_work[_sync]() by decrementing @work's disable count. @work can * only be queued if its disable count is 0. * - * Must be called from a sleepable context. Returns %true if the disable count - * reached 0. Otherwise, %false. + * Can be called from any context. Returns %true if the disable count reached 0. + * Otherwise, %false. */ bool enable_work(struct work_struct *work) { -- cgit v1.2.3 From 456a78eef2670d0e9521e87f35a056de8fec7fb2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Mar 2024 07:21:03 -1000 Subject: workqueue: Remember whether a work item was on a BH workqueue Add an off-queue flag, WORK_OFFQ_BH, that indicates whether the last workqueue the work item was on was a BH one. This will be used to test whether a work item is BH in cancel_sync path to implement atomic cancel_sync'ing for BH work items. Signed-off-by: Tejun Heo Reviewed-by: Lai Jiangshan --- include/linux/workqueue.h | 4 +++- kernel/workqueue.c | 10 ++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index a5075969931b..777b0186317e 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -52,9 +52,10 @@ enum work_bits { * * MSB * [ pool ID ] [ disable depth ] [ OFFQ flags ] [ STRUCT flags ] - * 16 bits 0 bits 4 or 5 bits + * 16 bits 1 bit 4 or 5 bits */ WORK_OFFQ_FLAG_SHIFT = WORK_STRUCT_FLAG_BITS, + WORK_OFFQ_BH_BIT = WORK_OFFQ_FLAG_SHIFT, WORK_OFFQ_FLAG_END, WORK_OFFQ_FLAG_BITS = WORK_OFFQ_FLAG_END - WORK_OFFQ_FLAG_SHIFT, @@ -98,6 +99,7 @@ enum wq_misc_consts { }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ +#define WORK_OFFQ_BH (1ul << WORK_OFFQ_BH_BIT) #define WORK_OFFQ_FLAG_MASK (((1ul << WORK_OFFQ_FLAG_BITS) - 1) << WORK_OFFQ_FLAG_SHIFT) #define WORK_OFFQ_DISABLE_MASK (((1ul << WORK_OFFQ_DISABLE_BITS) - 1) << WORK_OFFQ_DISABLE_SHIFT) #define WORK_OFFQ_POOL_NONE ((1ul << WORK_OFFQ_POOL_BITS) - 1) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e80a815ec172..baf7495338bc 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -764,6 +764,11 @@ static int work_next_color(int color) return (color + 1) % WORK_NR_COLORS; } +static unsigned long pool_offq_flags(struct worker_pool *pool) +{ + return (pool->flags & POOL_BH) ? WORK_OFFQ_BH : 0; +} + /* * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data * contain the pointer to the queued pwq. Once execution starts, the flag @@ -2122,7 +2127,8 @@ static int try_to_grab_pending(struct work_struct *work, u32 cflags, * this destroys work->data needed by the next step, stash it. */ work_data = *work_data_bits(work); - set_work_pool_and_keep_pending(work, pool->id, 0); + set_work_pool_and_keep_pending(work, pool->id, + pool_offq_flags(pool)); /* must be the last step, see the function comment */ pwq_dec_nr_in_flight(pwq, work_data); @@ -3175,7 +3181,7 @@ __acquires(&pool->lock) * PENDING and queued state changes happen together while IRQ is * disabled. */ - set_work_pool_and_clear_pending(work, pool->id, 0); + set_work_pool_and_clear_pending(work, pool->id, pool_offq_flags(pool)); pwq->stats[PWQ_STAT_STARTED]++; raw_spin_unlock_irq(&pool->lock); -- cgit v1.2.3 From ae1296a7bfe4f8e446677ccb761d9419926557bc Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Fri, 8 Mar 2024 17:42:52 +0800 Subject: workqueue: Move attrs->cpumask out of worker_pool's properties when attrs->affn_strict Allow more pools can be shared when attrs->affn_strict. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 3 +++ kernel/workqueue.c | 13 ++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 777b0186317e..bfcf8d38f4b1 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -185,6 +185,9 @@ struct workqueue_attrs { * Below fields aren't properties of a worker_pool. They only modify how * :c:func:`apply_workqueue_attrs` select pools and thus don't * participate in pool hash calculations or equality comparisons. + * + * If @affn_strict is set, @cpumask isn't a property of a worker_pool + * either. */ /** diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 45d2aae73c96..f03960f094fa 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4625,6 +4625,8 @@ static void wqattrs_clear_for_pool(struct workqueue_attrs *attrs) { attrs->affn_scope = WQ_AFFN_NR_TYPES; attrs->ordered = false; + if (attrs->affn_strict) + cpumask_copy(attrs->cpumask, cpu_possible_mask); } /* hash value of the content of @attr */ @@ -4633,11 +4635,12 @@ static u32 wqattrs_hash(const struct workqueue_attrs *attrs) u32 hash = 0; hash = jhash_1word(attrs->nice, hash); - hash = jhash(cpumask_bits(attrs->cpumask), - BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); + hash = jhash_1word(attrs->affn_strict, hash); hash = jhash(cpumask_bits(attrs->__pod_cpumask), BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); - hash = jhash_1word(attrs->affn_strict, hash); + if (!attrs->affn_strict) + hash = jhash(cpumask_bits(attrs->cpumask), + BITS_TO_LONGS(nr_cpumask_bits) * sizeof(long), hash); return hash; } @@ -4647,11 +4650,11 @@ static bool wqattrs_equal(const struct workqueue_attrs *a, { if (a->nice != b->nice) return false; - if (!cpumask_equal(a->cpumask, b->cpumask)) + if (a->affn_strict != b->affn_strict) return false; if (!cpumask_equal(a->__pod_cpumask, b->__pod_cpumask)) return false; - if (a->affn_strict != b->affn_strict) + if (!a->affn_strict && !cpumask_equal(a->cpumask, b->cpumask)) return false; return true; } -- cgit v1.2.3 From 474a549ff4c989427a14fdab851e562c8a63fe24 Mon Sep 17 00:00:00 2001 From: Allen Pais Date: Mon, 25 Mar 2024 18:02:01 +0000 Subject: workqueue: Introduce enable_and_queue_work() convenience function The enable_and_queue_work() function is introduced to streamline the process of enabling and queuing a work item on a specific workqueue. This function combines the functionalities of enable_work() and queue_work() in a single call, providing a concise and convenient API for enabling and queuing work items. The function accepts a target workqueue and a work item as parameters. It first attempts to enable the work item using enable_work(). A successful enable operation means that the work item was previously disabled and is now marked as eligible for execution. If the enable operation is successful, the work item is then queued on the specified workqueue using queue_work(). The function returns true if the work item was successfully enabled and queued, and false otherwise. Note: This function may lead to unnecessary spurious wake-ups in cases where the work item is expected to be dormant but enable/disable are called frequently. Spurious wake-ups refer to the condition where worker threads are woken up without actual work to be done. Callers should be aware of this behavior and may need to employ additional synchronization mechanisms to avoid these overheads if such wake-ups are not desired. This addition aims to enhance code readability and maintainability by providing a unified interface for the common use case of enabling and queuing work items on a workqueue. tj: Made the function comment more compact. Signed-off-by: Allen Pais Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bfcf8d38f4b1..2df1188c0f96 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -682,6 +682,32 @@ static inline bool schedule_work(struct work_struct *work) return queue_work(system_wq, work); } +/** + * enable_and_queue_work - Enable and queue a work item on a specific workqueue + * @wq: The target workqueue + * @work: The work item to be enabled and queued + * + * This function combines the operations of enable_work() and queue_work(), + * providing a convenient way to enable and queue a work item in a single call. + * It invokes enable_work() on @work and then queues it if the disable depth + * reached 0. Returns %true if the disable depth reached 0 and @work is queued, + * and %false otherwise. + * + * Note that @work is always queued when disable depth reaches zero. If the + * desired behavior is queueing only if certain events took place while @work is + * disabled, the user should implement the necessary state tracking and perform + * explicit conditional queueing after enable_work(). + */ +static inline bool enable_and_queue_work(struct workqueue_struct *wq, + struct work_struct *work) +{ + if (enable_work(work)) { + queue_work(wq, work); + return true; + } + return false; +} + /* * Detect attempt to flush system-wide workqueues at compile time when possible. * Warn attempt to flush system-wide workqueues at runtime. -- cgit v1.2.3 From 51da7f68edae38e81543d57fd71811f7481c0472 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Apr 2024 10:03:13 -1000 Subject: workqueue: Use "@..." in function comment to describe variable length argument Previously, it was using "remaining args" without leading "@" which isn't valid. Let's follow snprintf()'s example and use "@...". Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 2df1188c0f96..fb3993894536 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -473,7 +473,7 @@ void workqueue_softirq_dead(unsigned int cpu); * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags * @max_active: max in-flight work items, 0 for default - * remaining args: args for @fmt + * @...: args for @fmt * * For a per-cpu workqueue, @max_active limits the number of in-flight work * items for each CPU. e.g. @max_active of 1 indicates that each CPU can be -- cgit v1.2.3 From 231035f18d6b80e5c28732a20872398116a54ecd Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 6 Jun 2024 16:52:15 +0800 Subject: workqueue: Increase worker desc's length to 32 Commit 31c89007285d ("workqueue.c: Increase workqueue name length") increased WQ_NAME_LEN from 24 to 32, but forget to increase WORKER_DESC_LEN, which would cause truncation when setting kworker's desc from workqueue_struct's name, process_one_work() for example. Fixes: 31c89007285d ("workqueue.c: Increase workqueue name length") Signed-off-by: Wenchao Hao CC: Audra Mitchell Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..d9968bfc8eac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -95,7 +95,7 @@ enum wq_misc_consts { WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ - WORKER_DESC_LEN = 24, + WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -- cgit v1.2.3 From e1b6705bcfb2797ea182e313d5ec4f57fa8571f2 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 27 May 2024 17:56:48 -0700 Subject: cpumask: make core headers including cpumask_types.h where possible Now that cpumask types are split out to a separate smaller header, many frequently included core headers may switch to using it. Link: https://lkml.kernel.org/r/20240528005648.182376-7-yury.norov@gmail.com Signed-off-by: Yury Norov Cc: Amit Daniel Kachhap Cc: Anna-Maria Behnsen Cc: Christoph Lameter Cc: Daniel Lezcano Cc: Dennis Zhou Cc: Frederic Weisbecker Cc: Johannes Weiner Cc: Juri Lelli Cc: Kees Cook Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Rasmus Villemoes Cc: Tejun Heo Cc: Thomas Gleixner Cc: Ulf Hansson Cc: Vincent Guittot Cc: Viresh Kumar Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cacheinfo.h | 2 +- include/linux/clockchips.h | 2 +- include/linux/cpu_rmap.h | 2 +- include/linux/interrupt.h | 2 +- include/linux/irqchip/irq-partition-percpu.h | 2 +- include/linux/msi.h | 2 +- include/linux/pm_domain.h | 2 +- include/linux/stop_machine.h | 2 +- include/linux/torture.h | 2 +- include/linux/workqueue.h | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2cb15fe4fe12..286db104e054 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,7 +3,7 @@ #define _LINUX_CACHEINFO_H #include -#include +#include #include struct device_node; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 9aac31d856f3..b0df28ddd394 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -12,7 +12,7 @@ #ifdef CONFIG_GENERIC_CLOCKEVENTS # include -# include +# include # include # include diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h index cae324d10965..20b5729903d7 100644 --- a/include/linux/cpu_rmap.h +++ b/include/linux/cpu_rmap.h @@ -7,7 +7,7 @@ * Copyright 2011 Solarflare Communications Inc. */ -#include +#include #include #include #include diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5c9bdd3ffccc..136a55455529 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -5,13 +5,13 @@ #include #include -#include #include #include #include #include #include #include +#include #include #include diff --git a/include/linux/irqchip/irq-partition-percpu.h b/include/linux/irqchip/irq-partition-percpu.h index 2f6ae7551748..b35ee22c278f 100644 --- a/include/linux/irqchip/irq-partition-percpu.h +++ b/include/linux/irqchip/irq-partition-percpu.h @@ -8,7 +8,7 @@ #define __LINUX_IRQCHIP_IRQ_PARTITION_PERCPU_H #include -#include +#include #include struct partition_affinity { diff --git a/include/linux/msi.h b/include/linux/msi.h index dc27cf3903d5..26588da88bdd 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f24546a3d3db..71e4f0fb8867 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include /* diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index ea7a74ea7389..3132262a404d 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -3,7 +3,7 @@ #define _LINUX_STOP_MACHINE #include -#include +#include #include #include diff --git a/include/linux/torture.h b/include/linux/torture.h index 1541454da03e..c2e979f82f8d 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..52496f07fba5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From ec0a7d44b358afaaf52856d03c72e20587bc888b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 15:28:25 -0700 Subject: workqueue: Add interface for user-defined workqueue lockdep map Add an interface for a user-defined workqueue lockdep map, which is helpful when multiple workqueues are created for the same purpose. This also helps avoid leaking lockdep maps on each workqueue creation. v2: - Add alloc_workqueue_lockdep_map (Tejun) v3: - Drop __WQ_USER_OWNED_LOCKDEP (Tejun) - static inline alloc_ordered_workqueue_lockdep_map (Tejun) Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++ kernel/workqueue.c | 28 +++++++++++++++++++++++++ 2 files changed, 80 insertions(+) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 4eb8f9563136..8ccbf510880b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -507,6 +507,58 @@ void workqueue_softirq_dead(unsigned int cpu); __printf(1, 4) struct workqueue_struct * alloc_workqueue(const char *fmt, unsigned int flags, int max_active, ...); +#ifdef CONFIG_LOCKDEP +/** + * alloc_workqueue_lockdep_map - allocate a workqueue with user-defined lockdep_map + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags + * @max_active: max in-flight work items, 0 for default + * @lockdep_map: user-defined lockdep_map + * @...: args for @fmt + * + * Same as alloc_workqueue but with the a user-define lockdep_map. Useful for + * workqueues created with the same purpose and to avoid leaking a lockdep_map + * on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(1, 5) struct workqueue_struct * +alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, + struct lockdep_map *lockdep_map, ...); + +/** + * alloc_ordered_workqueue_lockdep_map - allocate an ordered workqueue with + * user-defined lockdep_map + * + * @fmt: printf format for the name of the workqueue + * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) + * @lockdep_map: user-defined lockdep_map + * @args: args for @fmt + * + * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. + * Useful for workqueues created with the same purpose and to avoid leaking a + * lockdep_map on each workqueue creation. + * + * RETURNS: + * Pointer to the allocated workqueue on success, %NULL on failure. + */ +__printf(1, 4) static inline struct workqueue_struct * +alloc_ordered_workqueue_lockdep_map(const char *fmt, unsigned int flags, + struct lockdep_map *lockdep_map, ...) +{ + struct workqueue_struct *wq; + va_list args; + + va_start(args, lockdep_map); + wq = alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | flags, + 1, lockdep_map, args); + va_end(args); + + return wq; +} +#endif + /** * alloc_ordered_workqueue - allocate an ordered workqueue * @fmt: printf format for the name of the workqueue diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4c8ba5c4229f..7468ba5e2417 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4775,11 +4775,17 @@ static void wq_init_lockdep(struct workqueue_struct *wq) static void wq_unregister_lockdep(struct workqueue_struct *wq) { + if (wq->lockdep_map != &wq->__lockdep_map) + return; + lockdep_unregister_key(&wq->key); } static void wq_free_lockdep(struct workqueue_struct *wq) { + if (wq->lockdep_map != &wq->__lockdep_map) + return; + if (wq->lock_name != wq->name) kfree(wq->lock_name); } @@ -5756,6 +5762,28 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, } EXPORT_SYMBOL_GPL(alloc_workqueue); +#ifdef CONFIG_LOCKDEP +__printf(1, 5) +struct workqueue_struct * +alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, + int max_active, struct lockdep_map *lockdep_map, ...) +{ + struct workqueue_struct *wq; + va_list args; + + va_start(args, lockdep_map); + wq = __alloc_workqueue(fmt, flags, max_active, args); + va_end(args); + if (!wq) + return NULL; + + wq->lockdep_map = lockdep_map; + + return wq; +} +EXPORT_SYMBOL_GPL(alloc_workqueue_lockdep_map); +#endif + static bool pwq_busy(struct pool_workqueue *pwq) { int i; -- cgit v1.2.3 From 8dffaec34dd55473adcbc924a4c9b04aaa0d4278 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 19 Aug 2024 12:18:23 -1000 Subject: workqueue: Fix htmldocs build warning Fix htmldocs build warning introduced by ec0a7d44b358 ("workqueue: Add interface for user-defined workqueue lockdep map"). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell Cc: Matthew Brost --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 8ccbf510880b..04dff07a9e2b 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -534,7 +534,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @lockdep_map: user-defined lockdep_map - * @args: args for @fmt + * @...: args for @fmt * * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. * Useful for workqueues created with the same purpose and to avoid leaking a -- cgit v1.2.3 From 9b59a85a84dc37ca4f2c54df5e06aff4c1eae5d3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 12:38:08 -0700 Subject: workqueue: Don't call va_start / va_end twice Calling va_start / va_end multiple times is undefined and causes problems with certain compiler / platforms. Change alloc_ordered_workqueue_lockdep_map to a macro and updated __alloc_workqueue to take a va_list argument. Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 16 ++-------------- kernel/workqueue.c | 6 +----- 2 files changed, 3 insertions(+), 19 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 04dff07a9e2b..f3cc15940b4d 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -543,20 +543,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * RETURNS: * Pointer to the allocated workqueue on success, %NULL on failure. */ -__printf(1, 4) static inline struct workqueue_struct * -alloc_ordered_workqueue_lockdep_map(const char *fmt, unsigned int flags, - struct lockdep_map *lockdep_map, ...) -{ - struct workqueue_struct *wq; - va_list args; - - va_start(args, lockdep_map); - wq = alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | flags, - 1, lockdep_map, args); - va_end(args); - - return wq; -} +#define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ + alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, lockdep_map, ##args) #endif /** diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7468ba5e2417..7cc77adb18bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5619,12 +5619,10 @@ static void wq_adjust_max_active(struct workqueue_struct *wq) } while (activated); } -__printf(1, 4) static struct workqueue_struct *__alloc_workqueue(const char *fmt, unsigned int flags, - int max_active, ...) + int max_active, va_list args) { - va_list args; struct workqueue_struct *wq; size_t wq_size; int name_len; @@ -5656,9 +5654,7 @@ static struct workqueue_struct *__alloc_workqueue(const char *fmt, goto err_free_wq; } - va_start(args, max_active); name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); - va_end(args); if (name_len >= WQ_NAME_LEN) pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", -- cgit v1.2.3 From d156263e247c334a8872578118e0709ed63c4806 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 21 Aug 2024 06:37:39 -1000 Subject: workqueue: Fix another htmldocs build warning Fix htmldocs build warning introduced by 9b59a85a84dc ("workqueue: Don't call va_start / va_end twice"). Signed-off-by: Tejun Heo Reported-by: Stephen Rothwell Cc: Matthew Brost --- include/linux/workqueue.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f3cc15940b4d..59c2695e12e7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -534,7 +534,7 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * @fmt: printf format for the name of the workqueue * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) * @lockdep_map: user-defined lockdep_map - * @...: args for @fmt + * @args: args for @fmt * * Same as alloc_ordered_workqueue but with the a user-define lockdep_map. * Useful for workqueues created with the same purpose and to avoid leaking a @@ -544,7 +544,8 @@ alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, int max_active, * Pointer to the allocated workqueue on success, %NULL on failure. */ #define alloc_ordered_workqueue_lockdep_map(fmt, flags, lockdep_map, args...) \ - alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, lockdep_map, ##args) + alloc_workqueue_lockdep_map(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), \ + 1, lockdep_map, ##args) #endif /** -- cgit v1.2.3 From 581434654e01ec79dd02c21448ac84e2ce2d1a64 Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 8 Oct 2024 11:24:58 +0000 Subject: workqueue: Adjust WQ_MAX_ACTIVE from 512 to 2048 WQ_MAX_ACTIVE is currently set to 512, which was established approximately 15 yeas ago. However, with the significant increase in machine sizes and capabilities, the previous limit of 256 concurrent tasks is no longer sufficient. Therefore, we propose to increase WQ_MAX_ACTIVE to 2048. and WQ_DFL_ACTIVE is 1024 now. Signed-off-by: Chen Ridong Signed-off-by: Tejun Heo --- Documentation/core-api/workqueue.rst | 4 ++-- include/linux/workqueue.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/workqueue.h') diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index 2b813f80ce39..e295835fc116 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -245,8 +245,8 @@ CPU which can be assigned to the work items of a wq. For example, with at the same time per CPU. This is always a per-CPU attribute, even for unbound workqueues. -The maximum limit for ``@max_active`` is 512 and the default value used -when 0 is specified is 256. These values are chosen sufficiently high +The maximum limit for ``@max_active`` is 2048 and the default value used +when 0 is specified is 1024. These values are chosen sufficiently high such that they are not the limiting factor while providing protection in runaway cases. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 59c2695e12e7..b0dc957c3e56 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -412,7 +412,7 @@ enum wq_flags { }; enum wq_consts { - WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ + WQ_MAX_ACTIVE = 2048, /* I like 2048, better ideas? */ WQ_UNBOUND_MAX_ACTIVE = WQ_MAX_ACTIVE, WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, -- cgit v1.2.3