summaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c185
1 files changed, 173 insertions, 12 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..7abbeed13421 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -259,6 +259,8 @@ struct workqueue_struct {
struct wq_device *wq_dev; /* I: for sysfs interface */
#endif
#ifdef CONFIG_LOCKDEP
+ char *lock_name;
+ struct lock_class_key key;
struct lockdep_map lockdep_map;
#endif
char name[WQ_NAME_LEN]; /* I: workqueue name */
@@ -646,7 +648,7 @@ static void set_work_pool_and_clear_pending(struct work_struct *work,
* The following mb guarantees that previous clear of a PENDING bit
* will not be reordered with any speculative LOADS or STORES from
* work->current_func, which is executed afterwards. This possible
- * reordering can lead to a missed execution on attempt to qeueue
+ * reordering can lead to a missed execution on attempt to queue
* the same @work. E.g. consider this case:
*
* CPU#0 CPU#1
@@ -910,6 +912,36 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task)
}
/**
+ * wq_worker_last_func - retrieve worker's last work function
+ *
+ * Determine the last function a worker executed. This is called from
+ * the scheduler to get a worker's last known identity.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * This function is called during schedule() when a kworker is going
+ * to sleep. It's used by psi to identify aggregation workers during
+ * dequeuing, to allow periodic aggregation to shut-off when that
+ * worker is the last task in the system or cgroup to go to sleep.
+ *
+ * As this function doesn't involve any workqueue-related locking, it
+ * only returns stable values when called from inside the scheduler's
+ * queuing and dequeuing paths, when @task, which must be a kworker,
+ * is guaranteed to not be processing any works.
+ *
+ * Return:
+ * The last work function %current executed as a worker, NULL if it
+ * hasn't executed any work yet.
+ */
+work_func_t wq_worker_last_func(struct task_struct *task)
+{
+ struct worker *worker = kthread_data(task);
+
+ return worker->last_func;
+}
+
+/**
* worker_set_flags - set worker flags and adjust nr_running accordingly
* @worker: self
* @flags: flags to set
@@ -1321,7 +1353,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
worker = current_wq_worker();
/*
- * Return %true iff I'm a worker execuing a work item on @wq. If
+ * Return %true iff I'm a worker executing a work item on @wq. If
* I'm @worker, it's safe to dereference it without locking.
*/
return worker && worker->current_pwq->wq == wq;
@@ -1492,6 +1524,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
}
EXPORT_SYMBOL(queue_work_on);
+/**
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
+ * @node: NUMA node ID that we want to select a CPU from
+ *
+ * This function will attempt to find a "random" cpu available on a given
+ * node. If there are no CPUs available on the given node it will return
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
+ * available CPU if we need to schedule this work.
+ */
+static int workqueue_select_cpu_near(int node)
+{
+ int cpu;
+
+ /* No point in doing this if NUMA isn't enabled for workqueues */
+ if (!wq_numa_enabled)
+ return WORK_CPU_UNBOUND;
+
+ /* Delay binding to CPU if node is not valid or online */
+ if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
+ return WORK_CPU_UNBOUND;
+
+ /* Use local node/cpu if we are already there */
+ cpu = raw_smp_processor_id();
+ if (node == cpu_to_node(cpu))
+ return cpu;
+
+ /* Use "random" otherwise know as "first" online CPU of node */
+ cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+
+ /* If CPU is valid return that, otherwise just defer */
+ return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
+}
+
+/**
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
+ * @node: NUMA node that we are targeting the work for
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
+ * idea here is to provide a way to somehow associate work with a given
+ * NUMA node.
+ *
+ * This function will only make a best effort attempt at getting this onto
+ * the right NUMA node. If no node is requested or the requested node is
+ * offline then we just fall back to standard queue_work behavior.
+ *
+ * Currently the "random" CPU ends up being the first available CPU in the
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
+ * are running on the node. In that case we just use the current CPU.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_work_node(int node, struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ unsigned long flags;
+ bool ret = false;
+
+ /*
+ * This current implementation is specific to unbound workqueues.
+ * Specifically we only return the first available CPU for a given
+ * node instead of cycling through individual CPUs within the node.
+ *
+ * If this is used with a per-cpu workqueue then the logic in
+ * workqueue_select_cpu_near would need to be updated to allow for
+ * some round robin type logic.
+ */
+ WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
+
+ local_irq_save(flags);
+
+ if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+ int cpu = workqueue_select_cpu_near(node);
+
+ __queue_work(cpu, wq, work);
+ ret = true;
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_node);
+
void delayed_work_timer_fn(struct timer_list *t)
{
struct delayed_work *dwork = from_timer(dwork, t, timer);
@@ -1619,7 +1735,7 @@ static void rcu_work_rcufn(struct rcu_head *rcu)
*
* Return: %false if @rwork was already pending, %true otherwise. Note
* that a full RCU grace period is guaranteed only after a %true return.
- * While @rwork is guarnateed to be executed after a %false return, the
+ * While @rwork is guaranteed to be executed after a %false return, the
* execution may happen before a full RCU grace period has passed.
*/
bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork)
@@ -2184,6 +2300,9 @@ __acquires(&pool->lock)
if (unlikely(cpu_intensive))
worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
+ /* tag the worker for identification in schedule() */
+ worker->last_func = worker->current_func;
+
/* we're done with it, release */
hash_del(&worker->hentry);
worker->current_work = NULL;
@@ -2908,6 +3027,9 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
if (WARN_ON(!wq_online))
return false;
+ if (WARN_ON(!work->func))
+ return false;
+
if (!from_cancel) {
lock_map_acquire(&work->lockdep_map);
lock_map_release(&work->lockdep_map);
@@ -3314,11 +3436,49 @@ static int init_worker_pool(struct worker_pool *pool)
return 0;
}
+#ifdef CONFIG_LOCKDEP
+static void wq_init_lockdep(struct workqueue_struct *wq)
+{
+ char *lock_name;
+
+ lockdep_register_key(&wq->key);
+ lock_name = kasprintf(GFP_KERNEL, "%s%s", "(wq_completion)", wq->name);
+ if (!lock_name)
+ lock_name = wq->name;
+ lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0);
+}
+
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
+{
+ lockdep_unregister_key(&wq->key);
+}
+
+static void wq_free_lockdep(struct workqueue_struct *wq)
+{
+ if (wq->lock_name != wq->name)
+ kfree(wq->lock_name);
+}
+#else
+static void wq_init_lockdep(struct workqueue_struct *wq)
+{
+}
+
+static void wq_unregister_lockdep(struct workqueue_struct *wq)
+{
+}
+
+static void wq_free_lockdep(struct workqueue_struct *wq)
+{
+}
+#endif
+
static void rcu_free_wq(struct rcu_head *rcu)
{
struct workqueue_struct *wq =
container_of(rcu, struct workqueue_struct, rcu);
+ wq_free_lockdep(wq);
+
if (!(wq->flags & WQ_UNBOUND))
free_percpu(wq->cpu_pwqs);
else
@@ -3509,8 +3669,10 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
* If we're the last pwq going away, @wq is already dead and no one
* is gonna access it anymore. Schedule RCU free.
*/
- if (is_last)
+ if (is_last) {
+ wq_unregister_lockdep(wq);
call_rcu(&wq->rcu, rcu_free_wq);
+ }
}
/**
@@ -4044,11 +4206,9 @@ static int init_rescuer(struct workqueue_struct *wq)
return 0;
}
-struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
- unsigned int flags,
- int max_active,
- struct lock_class_key *key,
- const char *lock_name, ...)
+struct workqueue_struct *alloc_workqueue(const char *fmt,
+ unsigned int flags,
+ int max_active, ...)
{
size_t tbl_size = 0;
va_list args;
@@ -4083,7 +4243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
goto err_free_wq;
}
- va_start(args, lock_name);
+ va_start(args, max_active);
vsnprintf(wq->name, sizeof(wq->name), fmt, args);
va_end(args);
@@ -4100,7 +4260,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
INIT_LIST_HEAD(&wq->flusher_overflow);
INIT_LIST_HEAD(&wq->maydays);
- lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
+ wq_init_lockdep(wq);
INIT_LIST_HEAD(&wq->list);
if (alloc_and_link_pwqs(wq) < 0)
@@ -4138,7 +4298,7 @@ err_destroy:
destroy_workqueue(wq);
return NULL;
}
-EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
+EXPORT_SYMBOL_GPL(alloc_workqueue);
/**
* destroy_workqueue - safely terminate a workqueue
@@ -4191,6 +4351,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
kthread_stop(wq->rescuer->task);
if (!(wq->flags & WQ_UNBOUND)) {
+ wq_unregister_lockdep(wq);
/*
* The base ref is never dropped on per-cpu pwqs. Directly
* schedule RCU free.