From 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@ppc970.osdl.org>
Date: Sat, 16 Apr 2005 15:20:36 -0700
Subject: Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
---
 include/linux/workqueue.h | 90 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 90 insertions(+)
 create mode 100644 include/linux/workqueue.h

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
new file mode 100644
index 000000000000..ff46f537ba9b
--- /dev/null
+++ b/include/linux/workqueue.h
@@ -0,0 +1,90 @@
+/*
+ * workqueue.h --- work queue handling for Linux.
+ */
+
+#ifndef _LINUX_WORKQUEUE_H
+#define _LINUX_WORKQUEUE_H
+
+#include <linux/timer.h>
+#include <linux/linkage.h>
+#include <linux/bitops.h>
+
+struct workqueue_struct;
+
+struct work_struct {
+	unsigned long pending;
+	struct list_head entry;
+	void (*func)(void *);
+	void *data;
+	void *wq_data;
+	struct timer_list timer;
+};
+
+#define __WORK_INITIALIZER(n, f, d) {				\
+        .entry	= { &(n).entry, &(n).entry },			\
+	.func = (f),						\
+	.data = (d),						\
+	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
+	}
+
+#define DECLARE_WORK(n, f, d)					\
+	struct work_struct n = __WORK_INITIALIZER(n, f, d)
+
+/*
+ * initialize a work-struct's func and data pointers:
+ */
+#define PREPARE_WORK(_work, _func, _data)			\
+	do {							\
+		(_work)->func = _func;				\
+		(_work)->data = _data;				\
+	} while (0)
+
+/*
+ * initialize all of a work-struct:
+ */
+#define INIT_WORK(_work, _func, _data)				\
+	do {							\
+		INIT_LIST_HEAD(&(_work)->entry);		\
+		(_work)->pending = 0;				\
+		PREPARE_WORK((_work), (_func), (_data));	\
+		init_timer(&(_work)->timer);			\
+	} while (0)
+
+extern struct workqueue_struct *__create_workqueue(const char *name,
+						    int singlethread);
+#define create_workqueue(name) __create_workqueue((name), 0)
+#define create_singlethread_workqueue(name) __create_workqueue((name), 1)
+
+extern void destroy_workqueue(struct workqueue_struct *wq);
+
+extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
+extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
+extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
+
+extern int FASTCALL(schedule_work(struct work_struct *work));
+extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay));
+
+extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
+extern void flush_scheduled_work(void);
+extern int current_is_keventd(void);
+extern int keventd_up(void);
+
+extern void init_workqueues(void);
+void cancel_rearming_delayed_work(struct work_struct *work);
+
+/*
+ * Kill off a pending schedule_delayed_work().  Note that the work callback
+ * function may still be running on return from cancel_delayed_work().  Run
+ * flush_scheduled_work() to wait on it.
+ */
+static inline int cancel_delayed_work(struct work_struct *work)
+{
+	int ret;
+
+	ret = del_timer_sync(&work->timer);
+	if (ret)
+		clear_bit(0, &work->pending);
+	return ret;
+}
+
+#endif
-- 
cgit v1.2.3


From 81ddef77bb774e771db8588b937665cd38f40cee Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@SteelEye.com>
Date: Sat, 16 Apr 2005 15:23:59 -0700
Subject: [PATCH] re-export cancel_rearming_delayed_workqueue

This was unexported by Arjan because we have no current users.

However, during a conversion from tasklets to workqueues of the parisc led
functions, we ran across a case where this was needed.  In particular, the
open coded equivalent of cancel_rearming_delayed_workqueue was implemented
incorrectly, which is, I think, all the evidence necessary that this is a
useful API.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h | 2 ++
 kernel/workqueue.c        | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ff46f537ba9b..ac39d04d027c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -71,6 +71,8 @@ extern int keventd_up(void);
 
 extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct work_struct *work);
+void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
+				       struct work_struct *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 52ef419d2747..259cf55da3c9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -429,12 +429,13 @@ void flush_scheduled_work(void)
  * @wq:   the controlling workqueue structure
  * @work: the delayed work struct
  */
-static void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
-					struct work_struct *work)
+void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
+				       struct work_struct *work)
 {
 	while (!cancel_delayed_work(work))
 		flush_workqueue(wq);
 }
+EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
 
 /**
  * cancel_rearming_delayed_work - reliably kill off a delayed keventd
-- 
cgit v1.2.3


From 15316ba81aee6775d6079fb46c66c801989e7d10 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Sun, 8 Jan 2006 01:00:43 -0800
Subject: [PATCH] add schedule_on_each_cpu()

swap migration's isolate_lru_page() currently uses an IPI to notify other
processors that the lru caches need to be drained if the page cannot be
found on the LRU.  The IPI interrupt may interrupt a processor that is just
processing lru requests and cause a race condition.

This patch introduces a new function run_on_each_cpu() that uses the
keventd() to run the LRU draining on each processor.  Processors disable
preemption when dealing the LRU caches (these are per processor) and thus
executing LRU draining from another process is safe.

Thanks to Lee Schermerhorn <lee.schermerhorn@hp.com> for finding this race
condition.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ac39d04d027c..86b111300231 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -65,6 +65,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
+extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2bd5aee1c736..62d47220696a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -419,6 +419,25 @@ int schedule_delayed_work_on(int cpu,
 	return ret;
 }
 
+int schedule_on_each_cpu(void (*func) (void *info), void *info)
+{
+	int cpu;
+	struct work_struct *work;
+
+	work = kmalloc(NR_CPUS * sizeof(struct work_struct), GFP_KERNEL);
+
+	if (!work)
+		return -ENOMEM;
+	for_each_online_cpu(cpu) {
+		INIT_WORK(work + cpu, func, info);
+		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
+				work + cpu);
+	}
+	flush_workqueue(keventd_wq);
+	kfree(work);
+	return 0;
+}
+
 void flush_scheduled_work(void)
 {
 	flush_workqueue(keventd_wq);
-- 
cgit v1.2.3


From 1fa44ecad2b86475e038aed81b0bf333fa484f8b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@steeleye.com>
Date: Thu, 23 Feb 2006 12:43:43 -0600
Subject: [SCSI] add execute_in_process_context() API

We have several points in the SCSI stack (primarily for our device
functions) where we need to guarantee process context, but (given the
place where the last reference was released) we cannot guarantee this.

This API gets around the issue by executing the function directly if
the caller has process context, but scheduling a workqueue to execute
in process context if the caller doesn't have it.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/workqueue.h |  6 ++++++
 kernel/workqueue.c        | 29 +++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 86b111300231..957c21c16d62 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -20,6 +20,10 @@ struct work_struct {
 	struct timer_list timer;
 };
 
+struct execute_work {
+	struct work_struct work;
+};
+
 #define __WORK_INITIALIZER(n, f, d) {				\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
@@ -74,6 +78,8 @@ extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct work_struct *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
 				       struct work_struct *);
+int execute_in_process_context(void (*fn)(void *), void *,
+			       struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b052e2c4c710..e9e464a90376 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -27,6 +27,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/kthread.h>
+#include <linux/hardirq.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -476,6 +477,34 @@ void cancel_rearming_delayed_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(cancel_rearming_delayed_work);
 
+/**
+ * execute_in_process_context - reliably execute the routine with user context
+ * @fn:		the function to execute
+ * @data:	data to pass to the function
+ * @ew:		guaranteed storage for the execute work structure (must
+ *		be available when the work executes)
+ *
+ * Executes the function immediately if process context is available,
+ * otherwise schedules the function for delayed execution.
+ *
+ * Returns:	0 - function was executed
+ *		1 - function was scheduled for execution
+ */
+int execute_in_process_context(void (*fn)(void *data), void *data,
+			       struct execute_work *ew)
+{
+	if (!in_interrupt()) {
+		fn(data);
+		return 0;
+	}
+
+	INIT_WORK(&ew->work, fn, data);
+	schedule_work(&ew->work);
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(execute_in_process_context);
+
 int keventd_up(void)
 {
 	return keventd_wq != NULL;
-- 
cgit v1.2.3


From 7a6bc1cdd506cf81f856f0fef4e56a2ba0c5a26d Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Wed, 28 Jun 2006 13:50:33 -0700
Subject: [CPUFREQ] Add queue_delayed_work_on() interface for workqueues.

Add queue_delayed_work_on() interface for workqueues.

Signed-off-by: Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 38 +++++++++++++++++++++++---------------
 2 files changed, 25 insertions(+), 15 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 957c21c16d62..9bca3539a1e5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -63,6 +63,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
 extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
+extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+	struct work_struct *work, unsigned long delay);
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 59f0b42bd89e..8fbef7008a7e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,27 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	return ret;
 }
 
+int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			struct work_struct *work, unsigned long delay)
+{
+	int ret = 0;
+	struct timer_list *timer = &work->timer;
+
+	if (!test_and_set_bit(0, &work->pending)) {
+		BUG_ON(timer_pending(timer));
+		BUG_ON(!list_empty(&work->entry));
+
+		/* This stores wq for the moment, for the timer_fn */
+		work->wq_data = wq;
+		timer->expires = jiffies + delay;
+		timer->data = (unsigned long)work;
+		timer->function = delayed_work_timer_fn;
+		add_timer_on(timer, cpu);
+		ret = 1;
+	}
+	return ret;
+}
+
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
 	unsigned long flags;
@@ -411,21 +432,7 @@ int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay
 int schedule_delayed_work_on(int cpu,
 			struct work_struct *work, unsigned long delay)
 {
-	int ret = 0;
-	struct timer_list *timer = &work->timer;
-
-	if (!test_and_set_bit(0, &work->pending)) {
-		BUG_ON(timer_pending(timer));
-		BUG_ON(!list_empty(&work->entry));
-		/* This stores keventd_wq for the moment, for the timer_fn */
-		work->wq_data = keventd_wq;
-		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)work;
-		timer->function = delayed_work_timer_fn;
-		add_timer_on(timer, cpu);
-		ret = 1;
-	}
-	return ret;
+	return queue_delayed_work_on(cpu, keventd_wq, work, delay);
 }
 
 /**
@@ -622,6 +629,7 @@ void init_workqueues(void)
 EXPORT_SYMBOL_GPL(__create_workqueue);
 EXPORT_SYMBOL_GPL(queue_work);
 EXPORT_SYMBOL_GPL(queue_delayed_work);
+EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 EXPORT_SYMBOL_GPL(flush_workqueue);
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
-- 
cgit v1.2.3


From 52bad64d95bd89e08c49ec5a071fa6dcbe5a1a9c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:01 +0000
Subject: WorkStruct: Separate delayable and non-delayable events.

Separate delayable work items from non-delayable work items be splitting them
into a separate structure (delayed_work), which incorporates a work_struct and
the timer_list removed from work_struct.

The work_struct struct is huge, and this limits it's usefulness.  On a 64-bit
architecture it's nearly 100 bytes in size.  This reduces that by half for the
non-delayable type of event.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 arch/x86_64/kernel/mce.c           |  2 +-
 drivers/ata/libata-core.c          | 11 +++-----
 drivers/ata/libata-eh.c            |  2 +-
 drivers/char/random.c              |  2 +-
 drivers/char/tty_io.c              |  2 +-
 fs/aio.c                           |  4 +--
 fs/nfs/client.c                    |  2 +-
 fs/nfs/namespace.c                 |  3 ++-
 include/linux/aio.h                |  2 +-
 include/linux/kbd_kern.h           |  2 +-
 include/linux/libata.h             |  4 +--
 include/linux/nfs_fs_sb.h          |  2 +-
 include/linux/sunrpc/rpc_pipe_fs.h |  2 +-
 include/linux/sunrpc/xprt.h        |  2 +-
 include/linux/tty.h                |  2 +-
 include/linux/workqueue.h          | 44 +++++++++++++++++++++++---------
 kernel/workqueue.c                 | 51 +++++++++++++++++++++-----------------
 mm/slab.c                          |  8 +++---
 net/core/link_watch.c              |  9 +++----
 net/sunrpc/cache.c                 |  4 +--
 net/sunrpc/rpc_pipe.c              |  3 ++-
 net/sunrpc/xprtsock.c              |  6 ++---
 22 files changed, 96 insertions(+), 73 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index bbea88801d88..5306f2630905 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -307,7 +307,7 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
 
 static int check_interval = 5 * 60; /* 5 minutes */
 static void mcheck_timer(void *data);
-static DECLARE_WORK(mcheck_work, mcheck_timer, NULL);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer, NULL);
 
 static void mcheck_check_cpu(void *info)
 {
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 915a55a6cc14..0bb4b4dced76 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -937,12 +937,9 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
 	if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK)
 		return;
 
-	PREPARE_WORK(&ap->port_task, fn, data);
+	PREPARE_DELAYED_WORK(&ap->port_task, fn, data);
 
-	if (!delay)
-		rc = queue_work(ata_wq, &ap->port_task);
-	else
-		rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
+	rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
 
 	/* rc == 0 means that another user is using port task */
 	WARN_ON(rc == 0);
@@ -5320,8 +5317,8 @@ void ata_port_init(struct ata_port *ap, struct ata_host *host,
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
-	INIT_WORK(&ap->port_task, NULL, NULL);
-	INIT_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
+	INIT_DELAYED_WORK(&ap->port_task, NULL, NULL);
+	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan, ap);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 02b2b2787d9b..9f6b7cc74fd9 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -332,7 +332,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	if (ap->pflags & ATA_PFLAG_LOADING)
 		ap->pflags &= ~ATA_PFLAG_LOADING;
 	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
-		queue_work(ata_aux_wq, &ap->hotplug_task);
+		queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);
 
 	if (ap->pflags & ATA_PFLAG_RECOVERED)
 		ata_port_printk(ap, KERN_INFO, "EH complete\n");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index eb6b13f4211a..f2ab61f3e8ae 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1424,7 +1424,7 @@ static unsigned int ip_cnt;
 
 static void rekey_seq_generator(void *private_);
 
-static DECLARE_WORK(rekey_work, rekey_seq_generator, NULL);
+static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
 
 /*
  * Lock avoidance:
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index e90ea39c7c4b..7297acfe520c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3580,7 +3580,7 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty->overrun_time = jiffies;
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
-	INIT_WORK(&tty->buf.work, flush_to_ldisc, tty);
+	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc, tty);
 	init_MUTEX(&tty->buf.pty_sem);
 	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
diff --git a/fs/aio.c b/fs/aio.c
index 94766599db00..11a1a7100ad6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -876,7 +876,7 @@ static void aio_kick_handler(void *data)
 	 * we're in a worker thread already, don't use queue_delayed_work,
 	 */
 	if (requeue)
-		queue_work(aio_wq, &ctx->wq);
+		queue_delayed_work(aio_wq, &ctx->wq, 0);
 }
 
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 5fea638743e4..6f0487d6f44a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index ec1114b33d89..5ed798bc1cf7 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -21,7 +21,8 @@
 static void nfs_expire_automounts(void *list);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
+			    &nfs_automount_list);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 0d71c0041f13..9e350fd44d77 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -194,7 +194,7 @@ struct kioctx {
 
 	struct aio_ring_info	ring_info;
 
-	struct work_struct	wq;
+	struct delayed_work	wq;
 };
 
 /* prototypes */
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index efe0ee4cc80b..06c58c423fe1 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -158,7 +158,7 @@ static inline void con_schedule_flip(struct tty_struct *t)
 	if (t->buf.tail != NULL)
 		t->buf.tail->commit = t->buf.tail->used;
 	spin_unlock_irqrestore(&t->buf.lock, flags);
-	schedule_work(&t->buf.work);
+	schedule_delayed_work(&t->buf.work, 0);
 }
 
 #endif
diff --git a/include/linux/libata.h b/include/linux/libata.h
index abd2debebca2..5f04006e8dd2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -568,8 +568,8 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
-	struct work_struct	port_task;
-	struct work_struct	hotplug_task;
+	struct delayed_work	port_task;
+	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
 
 	unsigned int		hsm_task_state;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7ccfc7ef0a83..95796e6924f1 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -51,7 +51,7 @@ struct nfs_client {
 
 	unsigned long		cl_lease_time;
 	unsigned long		cl_last_renewal;
-	struct work_struct	cl_renewd;
+	struct delayed_work	cl_renewd;
 
 	struct rpc_wait_queue	cl_rpcwaitq;
 
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a2eb9b4a9de3..4a68125b6de6 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -30,7 +30,7 @@ struct rpc_inode {
 #define RPC_PIPE_WAIT_FOR_OPEN	1
 	int flags;
 	struct rpc_pipe_ops *ops;
-	struct work_struct queue_timeout;
+	struct delayed_work queue_timeout;
 };
 
 static inline struct rpc_inode *
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 60394fbc4c70..3e04c1512fc4 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -177,7 +177,7 @@ struct rpc_xprt {
 	unsigned long		connect_timeout,
 				bind_timeout,
 				reestablish_timeout;
-	struct work_struct	connect_worker;
+	struct delayed_work	connect_worker;
 	unsigned short		port;
 
 	/*
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 44091c0db0b4..c1f716446161 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -53,7 +53,7 @@ struct tty_buffer {
 };
 
 struct tty_bufhead {
-	struct work_struct		work;
+	struct delayed_work work;
 	struct semaphore pty_sem;
 	spinlock_t lock;
 	struct tty_buffer *head;	/* Queue head */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9bca3539a1e5..9faaccae570e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -17,6 +17,10 @@ struct work_struct {
 	void (*func)(void *);
 	void *data;
 	void *wq_data;
+};
+
+struct delayed_work {
+	struct work_struct work;
 	struct timer_list timer;
 };
 
@@ -28,32 +32,48 @@ struct execute_work {
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	.data = (d),						\
+	}
+
+#define __DELAYED_WORK_INITIALIZER(n, f, d) {			\
+	.work = __WORK_INITIALIZER((n).work, (f), (d)),		\
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
 #define DECLARE_WORK(n, f, d)					\
 	struct work_struct n = __WORK_INITIALIZER(n, f, d)
 
+#define DECLARE_DELAYED_WORK(n, f, d)				\
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, d)
+
 /*
- * initialize a work-struct's func and data pointers:
+ * initialize a work item's function and data pointers
  */
 #define PREPARE_WORK(_work, _func, _data)			\
 	do {							\
-		(_work)->func = _func;				\
-		(_work)->data = _data;				\
+		(_work)->func = (_func);			\
+		(_work)->data = (_data);			\
 	} while (0)
 
+#define PREPARE_DELAYED_WORK(_work, _func, _data)		\
+	PREPARE_WORK(&(_work)->work, (_func), (_data))
+
 /*
- * initialize all of a work-struct:
+ * initialize all of a work item in one go
  */
 #define INIT_WORK(_work, _func, _data)				\
 	do {							\
 		INIT_LIST_HEAD(&(_work)->entry);		\
 		(_work)->pending = 0;				\
 		PREPARE_WORK((_work), (_func), (_data));	\
+	} while (0)
+
+#define INIT_DELAYED_WORK(_work, _func, _data)		\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func), (_data));	\
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
+
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
 #define create_workqueue(name) __create_workqueue((name), 0)
@@ -62,24 +82,24 @@ extern struct workqueue_struct *__create_workqueue(const char *name,
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct work_struct *work, unsigned long delay));
+extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay));
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-	struct work_struct *work, unsigned long delay);
+	struct delayed_work *work, unsigned long delay);
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct work_struct *work, unsigned long delay));
+extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
-extern int schedule_delayed_work_on(int cpu, struct work_struct *work, unsigned long delay);
+extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
 extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
 
 extern void init_workqueues(void);
-void cancel_rearming_delayed_work(struct work_struct *work);
+void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
-				       struct work_struct *);
+				       struct delayed_work *);
 int execute_in_process_context(void (*fn)(void *), void *,
 			       struct execute_work *);
 
@@ -88,13 +108,13 @@ int execute_in_process_context(void (*fn)(void *), void *,
  * function may still be running on return from cancel_delayed_work().  Run
  * flush_scheduled_work() to wait on it.
  */
-static inline int cancel_delayed_work(struct work_struct *work)
+static inline int cancel_delayed_work(struct delayed_work *work)
 {
 	int ret;
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
-		clear_bit(0, &work->pending);
+		clear_bit(0, &work->work.pending);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 17c2f03d2c27..44fc54b7decf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -122,29 +122,33 @@ EXPORT_SYMBOL_GPL(queue_work);
 
 static void delayed_work_timer_fn(unsigned long __data)
 {
-	struct work_struct *work = (struct work_struct *)__data;
-	struct workqueue_struct *wq = work->wq_data;
+	struct delayed_work *dwork = (struct delayed_work *)__data;
+	struct workqueue_struct *wq = dwork->work.wq_data;
 	int cpu = smp_processor_id();
 
 	if (unlikely(is_single_threaded(wq)))
 		cpu = singlethread_cpu;
 
-	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), &dwork->work);
 }
 
 /**
  * queue_delayed_work - queue work on a workqueue after delay
  * @wq: workqueue to use
- * @work: work to queue
+ * @work: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
  */
 int fastcall queue_delayed_work(struct workqueue_struct *wq,
-			struct work_struct *work, unsigned long delay)
+			struct delayed_work *dwork, unsigned long delay)
 {
 	int ret = 0;
-	struct timer_list *timer = &work->timer;
+	struct timer_list *timer = &dwork->timer;
+	struct work_struct *work = &dwork->work;
+
+	if (delay == 0)
+		return queue_work(wq, work);
 
 	if (!test_and_set_bit(0, &work->pending)) {
 		BUG_ON(timer_pending(timer));
@@ -153,7 +157,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 		/* This stores wq for the moment, for the timer_fn */
 		work->wq_data = wq;
 		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)work;
+		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
 		add_timer(timer);
 		ret = 1;
@@ -172,10 +176,11 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * Returns 0 if @work was already on a queue, non-zero otherwise.
  */
 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-			struct work_struct *work, unsigned long delay)
+			struct delayed_work *dwork, unsigned long delay)
 {
 	int ret = 0;
-	struct timer_list *timer = &work->timer;
+	struct timer_list *timer = &dwork->timer;
+	struct work_struct *work = &dwork->work;
 
 	if (!test_and_set_bit(0, &work->pending)) {
 		BUG_ON(timer_pending(timer));
@@ -184,7 +189,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		/* This stores wq for the moment, for the timer_fn */
 		work->wq_data = wq;
 		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)work;
+		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
 		add_timer_on(timer, cpu);
 		ret = 1;
@@ -468,31 +473,31 @@ EXPORT_SYMBOL(schedule_work);
 
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
- * @work: job to be done
- * @delay: number of jiffies to wait
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait or 0 for immediate execution
  *
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue.
  */
-int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
+int fastcall schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_delayed_work(keventd_wq, work, delay);
+	return queue_delayed_work(keventd_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
- * @work: job to be done
+ * @dwork: job to be done
  * @delay: number of jiffies to wait
  *
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue on the specified CPU.
  */
 int schedule_delayed_work_on(int cpu,
-			struct work_struct *work, unsigned long delay)
+			struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_delayed_work_on(cpu, keventd_wq, work, delay);
+	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work_on);
 
@@ -539,12 +544,12 @@ EXPORT_SYMBOL(flush_scheduled_work);
  * cancel_rearming_delayed_workqueue - reliably kill off a delayed
  *			work whose handler rearms the delayed work.
  * @wq:   the controlling workqueue structure
- * @work: the delayed work struct
+ * @dwork: the delayed work struct
  */
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
-				       struct work_struct *work)
+				       struct delayed_work *dwork)
 {
-	while (!cancel_delayed_work(work))
+	while (!cancel_delayed_work(dwork))
 		flush_workqueue(wq);
 }
 EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
@@ -552,11 +557,11 @@ EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
 /**
  * cancel_rearming_delayed_work - reliably kill off a delayed keventd
  *			work whose handler rearms the delayed work.
- * @work: the delayed work struct
+ * @dwork: the delayed work struct
  */
-void cancel_rearming_delayed_work(struct work_struct *work)
+void cancel_rearming_delayed_work(struct delayed_work *dwork)
 {
-	cancel_rearming_delayed_workqueue(keventd_wq, work);
+	cancel_rearming_delayed_workqueue(keventd_wq, dwork);
 }
 EXPORT_SYMBOL(cancel_rearming_delayed_work);
 
diff --git a/mm/slab.c b/mm/slab.c
index 3c4a7e34eddc..a65bc5e992c3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -753,7 +753,7 @@ int slab_is_available(void)
 	return g_cpucache_up == FULL;
 }
 
-static DEFINE_PER_CPU(struct work_struct, reap_work);
+static DEFINE_PER_CPU(struct delayed_work, reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
@@ -916,16 +916,16 @@ static void next_reap_node(void)
  */
 static void __devinit start_cpu_timer(int cpu)
 {
-	struct work_struct *reap_work = &per_cpu(reap_work, cpu);
+	struct delayed_work *reap_work = &per_cpu(reap_work, cpu);
 
 	/*
 	 * When this gets called from do_initcalls via cpucache_init(),
 	 * init_workqueues() has already run, so keventd will be setup
 	 * at that time.
 	 */
-	if (keventd_up() && reap_work->func == NULL) {
+	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_WORK(reap_work, cache_reap, NULL);
+		INIT_DELAYED_WORK(reap_work, cache_reap, NULL);
 		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
 	}
 }
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 4b36114744c5..f2ed09e25dfd 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -35,7 +35,7 @@ static unsigned long linkwatch_flags;
 static unsigned long linkwatch_nextevent;
 
 static void linkwatch_event(void *dummy);
-static DECLARE_WORK(linkwatch_work, linkwatch_event, NULL);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event, NULL);
 
 static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
@@ -171,10 +171,9 @@ void linkwatch_fire_event(struct net_device *dev)
 			unsigned long delay = linkwatch_nextevent - jiffies;
 
 			/* If we wrap around we'll delay it by at most HZ. */
-			if (!delay || delay > HZ)
-				schedule_work(&linkwatch_work);
-			else
-				schedule_delayed_work(&linkwatch_work, delay);
+			if (delay > HZ)
+				delay = 0;
+			schedule_delayed_work(&linkwatch_work, delay);
 		}
 	}
 }
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 00cb388ece03..d5725cb1491e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -285,7 +285,7 @@ static struct file_operations content_file_operations;
 static struct file_operations cache_flush_operations;
 
 static void do_cache_clean(void *data);
-static DECLARE_WORK(cache_cleaner, do_cache_clean, NULL);
+static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean, NULL);
 
 void cache_register(struct cache_detail *cd)
 {
@@ -337,7 +337,7 @@ void cache_register(struct cache_detail *cd)
 	spin_unlock(&cache_list_lock);
 
 	/* start the cleaning process */
-	schedule_work(&cache_cleaner);
+	schedule_delayed_work(&cache_cleaner, 0);
 }
 
 int cache_unregister(struct cache_detail *cd)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9a0b41a97f90..97be3f7fed44 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -837,7 +837,8 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		INIT_LIST_HEAD(&rpci->pipe);
 		rpci->pipelen = 0;
 		init_waitqueue_head(&rpci->waitq);
-		INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci);
+		INIT_DELAYED_WORK(&rpci->queue_timeout,
+				    rpc_timeout_upcall_queue, rpci);
 		rpci->ops = NULL;
 	}
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 757fc91ef25d..3c7532cd009e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1262,7 +1262,7 @@ static void xs_connect(struct rpc_task *task)
 			xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
 	} else {
 		dprintk("RPC:      xs_connect scheduled xprt %p\n", xprt);
-		schedule_work(&xprt->connect_worker);
+		schedule_delayed_work(&xprt->connect_worker, 0);
 
 		/* flush_scheduled_work can sleep... */
 		if (!RPC_IS_ASYNC(task))
@@ -1375,7 +1375,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1420,7 +1420,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
-- 
cgit v1.2.3


From 6bb49e5965c1fc399b4d3cd2b5cf2da535b330c0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:45 +0000
Subject: WorkStruct: Typedef the work function prototype

Define a type for the work function prototype.  It's not only kept in the
work_struct struct, it's also passed as an argument to several functions.

This makes it easier to change it.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 drivers/block/floppy.c    | 4 ++--
 include/linux/workqueue.h | 9 +++++----
 kernel/workqueue.c        | 6 +++---
 3 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 9e6d3a87cbe3..5a14fac13b12 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -996,7 +996,7 @@ static DECLARE_WORK(floppy_work, NULL, NULL);
 
 static void schedule_bh(void (*handler) (void))
 {
-	PREPARE_WORK(&floppy_work, (void (*)(void *))handler, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)handler, NULL);
 	schedule_work(&floppy_work);
 }
 
@@ -1008,7 +1008,7 @@ static void cancel_activity(void)
 
 	spin_lock_irqsave(&floppy_lock, flags);
 	do_floppy = NULL;
-	PREPARE_WORK(&floppy_work, (void *)empty, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)empty, NULL);
 	del_timer(&fd_timer);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 }
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9faaccae570e..cef40b22ff9a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,10 +11,12 @@
 
 struct workqueue_struct;
 
+typedef void (*work_func_t)(void *data);
+
 struct work_struct {
 	unsigned long pending;
 	struct list_head entry;
-	void (*func)(void *);
+	work_func_t func;
 	void *data;
 	void *wq_data;
 };
@@ -91,7 +93,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
-extern int schedule_on_each_cpu(void (*func)(void *info), void *info);
+extern int schedule_on_each_cpu(work_func_t func, void *info);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
@@ -100,8 +102,7 @@ extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
 				       struct delayed_work *);
-int execute_in_process_context(void (*fn)(void *), void *,
-			       struct execute_work *);
+int execute_in_process_context(work_func_t fn, void *, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 44fc54b7decf..1e9d61ecf762 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -217,7 +217,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 	while (!list_empty(&cwq->worklist)) {
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
-		void (*f) (void *) = work->func;
+		work_func_t f = work->func;
 		void *data = work->data;
 
 		list_del_init(cwq->worklist.next);
@@ -513,7 +513,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  *
  * schedule_on_each_cpu() is very slow.
  */
-int schedule_on_each_cpu(void (*func)(void *info), void *info)
+int schedule_on_each_cpu(work_func_t func, void *info)
 {
 	int cpu;
 	struct work_struct *works;
@@ -578,7 +578,7 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
  * Returns:	0 - function was executed
  *		1 - function was scheduled for execution
  */
-int execute_in_process_context(void (*fn)(void *data), void *data,
+int execute_in_process_context(work_func_t fn, void *data,
 			       struct execute_work *ew)
 {
 	if (!in_interrupt()) {
-- 
cgit v1.2.3


From 365970a1ea76d81cb1ad2f652acb605f06dae256 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:54:49 +0000
Subject: WorkStruct: Merge the pending bit into the wq_data pointer

Reclaim a word from the size of the work_struct by folding the pending bit and
the wq_data pointer together.  This shouldn't cause misalignment problems as
all pointers should be at least 4-byte aligned.

Signed-Off-By: David Howells <dhowells@redhat.com>
---
 drivers/block/floppy.c    |  4 ++--
 include/linux/workqueue.h | 27 +++++++++++++++++++++++----
 kernel/workqueue.c        | 41 ++++++++++++++++++++++++++++++++---------
 3 files changed, 57 insertions(+), 15 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 5a14fac13b12..aa1eb4466f9d 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -1868,7 +1868,7 @@ static void show_floppy(void)
 	printk("fdc_busy=%lu\n", fdc_busy);
 	if (do_floppy)
 		printk("do_floppy=%p\n", do_floppy);
-	if (floppy_work.pending)
+	if (work_pending(&floppy_work))
 		printk("floppy_work.func=%p\n", floppy_work.func);
 	if (timer_pending(&fd_timer))
 		printk("fd_timer.function=%p\n", fd_timer.function);
@@ -4498,7 +4498,7 @@ static void floppy_release_irq_and_dma(void)
 		printk("floppy timer still active:%s\n", timeout_message);
 	if (timer_pending(&fd_timer))
 		printk("auxiliary floppy timer still active\n");
-	if (floppy_work.pending)
+	if (work_pending(&floppy_work))
 		printk("work still pending\n");
 #endif
 	old_fdc = fdc;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index cef40b22ff9a..ecc017d24cf3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -14,11 +14,15 @@ struct workqueue_struct;
 typedef void (*work_func_t)(void *data);
 
 struct work_struct {
-	unsigned long pending;
+	/* the first word is the work queue pointer and the pending flag
+	 * rolled into one */
+	unsigned long management;
+#define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_FLAG_MASK (3UL)
+#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
 	void *data;
-	void *wq_data;
 };
 
 struct delayed_work {
@@ -65,7 +69,7 @@ struct execute_work {
 #define INIT_WORK(_work, _func, _data)				\
 	do {							\
 		INIT_LIST_HEAD(&(_work)->entry);		\
-		(_work)->pending = 0;				\
+		(_work)->management = 0;			\
 		PREPARE_WORK((_work), (_func), (_data));	\
 	} while (0)
 
@@ -75,6 +79,21 @@ struct execute_work {
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
+/**
+ * work_pending - Find out whether a work item is currently pending
+ * @work: The work item in question
+ */
+#define work_pending(work) \
+	test_bit(WORK_STRUCT_PENDING, &(work)->management)
+
+/**
+ * delayed_work_pending - Find out whether a delayable work item is currently
+ * pending
+ * @work: The work item in question
+ */
+#define delayed_work_pending(work) \
+	test_bit(WORK_STRUCT_PENDING, &(work)->work.management)
+
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
@@ -115,7 +134,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
-		clear_bit(0, &work->work.pending);
+		clear_bit(WORK_STRUCT_PENDING, &work->work.management);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1e9d61ecf762..967479756511 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -80,6 +80,29 @@ static inline int is_single_threaded(struct workqueue_struct *wq)
 	return list_empty(&wq->list);
 }
 
+static inline void set_wq_data(struct work_struct *work, void *wq)
+{
+	unsigned long new, old, res;
+
+	/* assume the pending flag is already set and that the task has already
+	 * been queued on this workqueue */
+	new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
+	res = work->management;
+	if (res != new) {
+		do {
+			old = res;
+			new = (unsigned long) wq;
+			new |= (old & WORK_STRUCT_FLAG_MASK);
+			res = cmpxchg(&work->management, old, new);
+		} while (res != old);
+	}
+}
+
+static inline void *get_wq_data(struct work_struct *work)
+{
+	return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+}
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
@@ -87,7 +110,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 	unsigned long flags;
 
 	spin_lock_irqsave(&cwq->lock, flags);
-	work->wq_data = cwq;
+	set_wq_data(work, cwq);
 	list_add_tail(&work->entry, &cwq->worklist);
 	cwq->insert_sequence++;
 	wake_up(&cwq->more_work);
@@ -108,7 +131,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
 	int ret = 0, cpu = get_cpu();
 
-	if (!test_and_set_bit(0, &work->pending)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
 		if (unlikely(is_single_threaded(wq)))
 			cpu = singlethread_cpu;
 		BUG_ON(!list_empty(&work->entry));
@@ -123,7 +146,7 @@ EXPORT_SYMBOL_GPL(queue_work);
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct workqueue_struct *wq = dwork->work.wq_data;
+	struct workqueue_struct *wq = get_wq_data(&dwork->work);
 	int cpu = smp_processor_id();
 
 	if (unlikely(is_single_threaded(wq)))
@@ -150,12 +173,12 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	if (delay == 0)
 		return queue_work(wq, work);
 
-	if (!test_and_set_bit(0, &work->pending)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		/* This stores wq for the moment, for the timer_fn */
-		work->wq_data = wq;
+		set_wq_data(work, wq);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -182,12 +205,12 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
-	if (!test_and_set_bit(0, &work->pending)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		/* This stores wq for the moment, for the timer_fn */
-		work->wq_data = wq;
+		set_wq_data(work, wq);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -223,8 +246,8 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
-		BUG_ON(work->wq_data != cwq);
-		clear_bit(0, &work->pending);
+		BUG_ON(get_wq_data(work) != cwq);
+		clear_bit(WORK_STRUCT_PENDING, &work->management);
 		f(data);
 
 		spin_lock_irqsave(&cwq->lock, flags);
-- 
cgit v1.2.3


From 65f27f38446e1976cc98fd3004b110fedcddd189 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 22 Nov 2006 14:55:48 +0000
Subject: WorkStruct: Pass the work_struct pointer instead of context data

Pass the work_struct pointer to the work function rather than context data.
The work function can use container_of() to work out the data.

For the cases where the container of the work_struct may go away the moment the
pending bit is cleared, it is made possible to defer the release of the
structure by deferring the clearing of the pending bit.

To make this work, an extra flag is introduced into the management side of the
work_struct.  This governs auto-release of the structure upon execution.

Ordinarily, the work queue executor would release the work_struct for further
scheduling or deallocation by clearing the pending bit prior to jumping to the
work function.  This means that, unless the driver makes some guarantee itself
that the work_struct won't go away, the work function may not access anything
else in the work_struct or its container lest they be deallocated..  This is a
problem if the auxiliary data is taken away (as done by the last patch).

However, if the pending bit is *not* cleared before jumping to the work
function, then the work function *may* access the work_struct and its container
with no problems.  But then the work function must itself release the
work_struct by calling work_release().

In most cases, automatic release is fine, so this is the default.  Special
initiators exist for the non-auto-release case (ending in _NAR).


Signed-Off-By: David Howells <dhowells@redhat.com>
---
 arch/x86_64/kernel/mce.c           |  6 +--
 arch/x86_64/kernel/smpboot.c       | 12 +++--
 arch/x86_64/kernel/time.c          |  4 +-
 block/as-iosched.c                 |  7 +--
 block/cfq-iosched.c                |  8 +--
 block/ll_rw_blk.c                  |  8 +--
 crypto/cryptomgr.c                 |  7 +--
 drivers/acpi/osl.c                 | 25 +++-------
 drivers/ata/libata-core.c          | 20 ++++----
 drivers/ata/libata-scsi.c          | 14 +++---
 drivers/ata/libata.h               |  4 +-
 drivers/block/floppy.c             |  6 +--
 drivers/char/random.c              |  6 +--
 drivers/char/sysrq.c               |  4 +-
 drivers/char/tty_io.c              | 31 ++++++------
 drivers/char/vt.c                  |  6 +--
 drivers/cpufreq/cpufreq.c          | 10 ++--
 drivers/input/keyboard/atkbd.c     |  6 +--
 drivers/input/serio/libps2.c       |  6 +--
 drivers/net/e1000/e1000_main.c     | 10 ++--
 drivers/pci/pcie/aer/aerdrv.c      |  2 +-
 drivers/pci/pcie/aer/aerdrv.h      |  2 +-
 drivers/pci/pcie/aer/aerdrv_core.c |  8 +--
 drivers/scsi/scsi_scan.c           |  7 +--
 drivers/scsi/scsi_sysfs.c          | 10 ++--
 fs/aio.c                           | 14 +++---
 fs/bio.c                           |  6 +--
 fs/file.c                          |  6 ++-
 fs/nfs/client.c                    |  2 +-
 fs/nfs/namespace.c                 |  9 ++--
 fs/nfs/nfs4_fs.h                   |  2 +-
 fs/nfs/nfs4renewd.c                |  5 +-
 include/linux/libata.h             |  3 +-
 include/linux/workqueue.h          | 99 +++++++++++++++++++++++++++++---------
 include/net/inet_timewait_sock.h   |  2 +-
 ipc/util.c                         |  7 ++-
 kernel/kmod.c                      | 16 +++---
 kernel/kthread.c                   | 13 +++--
 kernel/power/poweroff.c            |  4 +-
 kernel/sys.c                       |  4 +-
 kernel/workqueue.c                 | 19 +++-----
 mm/slab.c                          |  6 +--
 net/core/link_watch.c              |  6 +--
 net/ipv4/inet_timewait_sock.c      |  5 +-
 net/ipv4/tcp_minisocks.c           |  3 +-
 net/sunrpc/cache.c                 |  6 +--
 net/sunrpc/rpc_pipe.c              |  7 +--
 net/sunrpc/sched.c                 |  8 +--
 net/sunrpc/xprt.c                  |  7 +--
 net/sunrpc/xprtsock.c              | 18 ++++---
 security/keys/key.c                |  6 +--
 51 files changed, 293 insertions(+), 219 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 5306f2630905..c7587fc39015 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -306,8 +306,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
  */
 
 static int check_interval = 5 * 60; /* 5 minutes */
-static void mcheck_timer(void *data);
-static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer, NULL);
+static void mcheck_timer(struct work_struct *work);
+static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
 
 static void mcheck_check_cpu(void *info)
 {
@@ -315,7 +315,7 @@ static void mcheck_check_cpu(void *info)
 		do_machine_check(NULL, 0);
 }
 
-static void mcheck_timer(void *data)
+static void mcheck_timer(struct work_struct *work)
 {
 	on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
 	schedule_delayed_work(&mcheck_work, check_interval * HZ);
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 62c2e747af58..9800147c4c68 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -753,14 +753,16 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta
 }
 
 struct create_idle {
+	struct work_struct work;
 	struct task_struct *idle;
 	struct completion done;
 	int cpu;
 };
 
-void do_fork_idle(void *_c_idle)
+void do_fork_idle(struct work_struct *work)
 {
-	struct create_idle *c_idle = _c_idle;
+	struct create_idle *c_idle =
+		container_of(work, struct create_idle, work);
 
 	c_idle->idle = fork_idle(c_idle->cpu);
 	complete(&c_idle->done);
@@ -775,10 +777,10 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	int timeout;
 	unsigned long start_rip;
 	struct create_idle c_idle = {
+		.work = __WORK_INITIALIZER(c_idle.work, do_fork_idle),
 		.cpu = cpu,
 		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
-	DECLARE_WORK(work, do_fork_idle, &c_idle);
 
 	/* allocate memory for gdts of secondary cpus. Hotplug is considered */
 	if (!cpu_gdt_descr[cpu].address &&
@@ -825,9 +827,9 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 	 * thread.
 	 */
 	if (!keventd_up() || current_is_keventd())
-		work.func(work.data);
+		c_idle.work.func(&c_idle.work);
 	else {
-		schedule_work(&work);
+		schedule_work(&c_idle.work);
 		wait_for_completion(&c_idle.done);
 	}
 
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index e3ef544d2cfb..9f05bc9b2dad 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -563,7 +563,7 @@ static unsigned int cpufreq_delayed_issched = 0;
 static unsigned int cpufreq_init = 0;
 static struct work_struct cpufreq_delayed_get_work;
 
-static void handle_cpufreq_delayed_get(void *v)
+static void handle_cpufreq_delayed_get(struct work_struct *v)
 {
 	unsigned int cpu;
 	for_each_online_cpu(cpu) {
@@ -639,7 +639,7 @@ static struct notifier_block time_cpufreq_notifier_block = {
 
 static int __init cpufreq_tsc(void)
 {
-	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
+	INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
 	if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
 				       CPUFREQ_TRANSITION_NOTIFIER))
 		cpufreq_init = 1;
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 50b95e4c1425..f371c9359999 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1274,9 +1274,10 @@ static void as_merged_requests(request_queue_t *q, struct request *req,
  *
  * FIXME! dispatch queue is not a queue at all!
  */
-static void as_work_handler(void *data)
+static void as_work_handler(struct work_struct *work)
 {
-	struct request_queue *q = data;
+	struct as_data *ad = container_of(work, struct as_data, antic_work);
+	struct request_queue *q = ad->q;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -1332,7 +1333,7 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e)
 	ad->antic_timer.function = as_antic_timeout;
 	ad->antic_timer.data = (unsigned long)q;
 	init_timer(&ad->antic_timer);
-	INIT_WORK(&ad->antic_work, as_work_handler, q);
+	INIT_WORK(&ad->antic_work, as_work_handler);
 
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
 	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1d9c3c70a9a0..6cec3a1dccb8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1841,9 +1841,11 @@ queue_fail:
 	return 1;
 }
 
-static void cfq_kick_queue(void *data)
+static void cfq_kick_queue(struct work_struct *work)
 {
-	request_queue_t *q = data;
+	struct cfq_data *cfqd =
+		container_of(work, struct cfq_data, unplug_work);
+	request_queue_t *q = cfqd->queue;
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -1987,7 +1989,7 @@ static void *cfq_init_queue(request_queue_t *q, elevator_t *e)
 	cfqd->idle_class_timer.function = cfq_idle_class_timer;
 	cfqd->idle_class_timer.data = (unsigned long) cfqd;
 
-	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue, q);
+	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 9eaee6640535..eb4cf6df7374 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -34,7 +34,7 @@
  */
 #include <scsi/scsi_cmnd.h>
 
-static void blk_unplug_work(void *data);
+static void blk_unplug_work(struct work_struct *work);
 static void blk_unplug_timeout(unsigned long data);
 static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
 static void init_request_from_bio(struct request *req, struct bio *bio);
@@ -227,7 +227,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	if (q->unplug_delay == 0)
 		q->unplug_delay = 1;
 
-	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
+	INIT_WORK(&q->unplug_work, blk_unplug_work);
 
 	q->unplug_timer.function = blk_unplug_timeout;
 	q->unplug_timer.data = (unsigned long)q;
@@ -1631,9 +1631,9 @@ static void blk_backing_dev_unplug(struct backing_dev_info *bdi,
 	}
 }
 
-static void blk_unplug_work(void *data)
+static void blk_unplug_work(struct work_struct *work)
 {
-	request_queue_t *q = data;
+	request_queue_t *q = container_of(work, request_queue_t, unplug_work);
 
 	blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL,
 				q->rq.count[READ] + q->rq.count[WRITE]);
diff --git a/crypto/cryptomgr.c b/crypto/cryptomgr.c
index 9b5b15601068..2ebffb84f1d9 100644
--- a/crypto/cryptomgr.c
+++ b/crypto/cryptomgr.c
@@ -40,9 +40,10 @@ struct cryptomgr_param {
 	char template[CRYPTO_MAX_ALG_NAME];
 };
 
-static void cryptomgr_probe(void *data)
+static void cryptomgr_probe(struct work_struct *work)
 {
-	struct cryptomgr_param *param = data;
+	struct cryptomgr_param *param =
+		container_of(work, struct cryptomgr_param, work);
 	struct crypto_template *tmpl;
 	struct crypto_instance *inst;
 	int err;
@@ -112,7 +113,7 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
 	param->larval.type = larval->alg.cra_flags;
 	param->larval.mask = larval->mask;
 
-	INIT_WORK(&param->work, cryptomgr_probe, param);
+	INIT_WORK(&param->work, cryptomgr_probe);
 	schedule_work(&param->work);
 
 	return NOTIFY_STOP;
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 068fe4f100b0..02b30ae6a68e 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -50,6 +50,7 @@ ACPI_MODULE_NAME("osl")
 struct acpi_os_dpc {
 	acpi_osd_exec_callback function;
 	void *context;
+	struct work_struct work;
 };
 
 #ifdef CONFIG_ACPI_CUSTOM_DSDT
@@ -564,12 +565,9 @@ void acpi_os_derive_pci_id(acpi_handle rhandle,	/* upper bound  */
 	acpi_os_derive_pci_id_2(rhandle, chandle, id, &is_bridge, &bus_number);
 }
 
-static void acpi_os_execute_deferred(void *context)
+static void acpi_os_execute_deferred(struct work_struct *work)
 {
-	struct acpi_os_dpc *dpc = NULL;
-
-
-	dpc = (struct acpi_os_dpc *)context;
+	struct acpi_os_dpc *dpc = container_of(work, struct acpi_os_dpc, work);
 	if (!dpc) {
 		printk(KERN_ERR PREFIX "Invalid (NULL) context\n");
 		return;
@@ -602,7 +600,6 @@ acpi_status acpi_os_execute(acpi_execute_type type,
 {
 	acpi_status status = AE_OK;
 	struct acpi_os_dpc *dpc;
-	struct work_struct *task;
 
 	ACPI_FUNCTION_TRACE("os_queue_for_execution");
 
@@ -615,28 +612,22 @@ acpi_status acpi_os_execute(acpi_execute_type type,
 
 	/*
 	 * Allocate/initialize DPC structure.  Note that this memory will be
-	 * freed by the callee.  The kernel handles the tq_struct list  in a
+	 * freed by the callee.  The kernel handles the work_struct list  in a
 	 * way that allows us to also free its memory inside the callee.
 	 * Because we may want to schedule several tasks with different
 	 * parameters we can't use the approach some kernel code uses of
-	 * having a static tq_struct.
-	 * We can save time and code by allocating the DPC and tq_structs
-	 * from the same memory.
+	 * having a static work_struct.
 	 */
 
-	dpc =
-	    kmalloc(sizeof(struct acpi_os_dpc) + sizeof(struct work_struct),
-		    GFP_ATOMIC);
+	dpc = kmalloc(sizeof(struct acpi_os_dpc), GFP_ATOMIC);
 	if (!dpc)
 		return_ACPI_STATUS(AE_NO_MEMORY);
 
 	dpc->function = function;
 	dpc->context = context;
 
-	task = (void *)(dpc + 1);
-	INIT_WORK(task, acpi_os_execute_deferred, (void *)dpc);
-
-	if (!queue_work(kacpid_wq, task)) {
+	INIT_WORK(&dpc->work, acpi_os_execute_deferred);
+	if (!queue_work(kacpid_wq, &dpc->work)) {
 		ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 				  "Call to queue_work() failed.\n"));
 		kfree(dpc);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 0bb4b4dced76..b5f2da6ac80e 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -914,7 +914,7 @@ static unsigned int ata_id_xfermask(const u16 *id)
  *	ata_port_queue_task - Queue port_task
  *	@ap: The ata_port to queue port_task for
  *	@fn: workqueue function to be scheduled
- *	@data: data value to pass to workqueue function
+ *	@data: data for @fn to use
  *	@delay: delay time for workqueue function
  *
  *	Schedule @fn(@data) for execution after @delay jiffies using
@@ -929,7 +929,7 @@ static unsigned int ata_id_xfermask(const u16 *id)
  *	LOCKING:
  *	Inherited from caller.
  */
-void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
+void ata_port_queue_task(struct ata_port *ap, work_func_t fn, void *data,
 			 unsigned long delay)
 {
 	int rc;
@@ -937,7 +937,8 @@ void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *), void *data,
 	if (ap->pflags & ATA_PFLAG_FLUSH_PORT_TASK)
 		return;
 
-	PREPARE_DELAYED_WORK(&ap->port_task, fn, data);
+	PREPARE_DELAYED_WORK(&ap->port_task, fn);
+	ap->port_task_data = data;
 
 	rc = queue_delayed_work(ata_wq, &ap->port_task, delay);
 
@@ -4292,10 +4293,11 @@ fsm_start:
 	return poll_next;
 }
 
-static void ata_pio_task(void *_data)
+static void ata_pio_task(struct work_struct *work)
 {
-	struct ata_queued_cmd *qc = _data;
-	struct ata_port *ap = qc->ap;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, port_task.work);
+	struct ata_queued_cmd *qc = ap->port_task_data;
 	u8 status;
 	int poll_next;
 
@@ -5317,9 +5319,9 @@ void ata_port_init(struct ata_port *ap, struct ata_host *host,
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
-	INIT_DELAYED_WORK(&ap->port_task, NULL, NULL);
-	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug, ap);
-	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan, ap);
+	INIT_DELAYED_WORK(&ap->port_task, NULL);
+	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
+	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
 	init_waitqueue_head(&ap->eh_wait_q);
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 5c1fc467fc7f..c872b324dbd3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3079,7 +3079,7 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
 
 /**
  *	ata_scsi_hotplug - SCSI part of hotplug
- *	@data: Pointer to ATA port to perform SCSI hotplug on
+ *	@work: Pointer to ATA port to perform SCSI hotplug on
  *
  *	Perform SCSI part of hotplug.  It's executed from a separate
  *	workqueue after EH completes.  This is necessary because SCSI
@@ -3089,9 +3089,10 @@ static void ata_scsi_remove_dev(struct ata_device *dev)
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_scsi_hotplug(void *data)
+void ata_scsi_hotplug(struct work_struct *work)
 {
-	struct ata_port *ap = data;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, hotplug_task.work);
 	int i;
 
 	if (ap->pflags & ATA_PFLAG_UNLOADING) {
@@ -3190,7 +3191,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
 
 /**
  *	ata_scsi_dev_rescan - initiate scsi_rescan_device()
- *	@data: Pointer to ATA port to perform scsi_rescan_device()
+ *	@work: Pointer to ATA port to perform scsi_rescan_device()
  *
  *	After ATA pass thru (SAT) commands are executed successfully,
  *	libata need to propagate the changes to SCSI layer.  This
@@ -3200,9 +3201,10 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
  *	LOCKING:
  *	Kernel thread context (may sleep).
  */
-void ata_scsi_dev_rescan(void *data)
+void ata_scsi_dev_rescan(struct work_struct *work)
 {
-	struct ata_port *ap = data;
+	struct ata_port *ap =
+		container_of(work, struct ata_port, scsi_rescan_task);
 	struct ata_device *dev;
 	unsigned int i;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 0ed263be652a..7e0f3aff873d 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -81,7 +81,7 @@ extern struct scsi_transport_template ata_scsi_transport_template;
 
 extern void ata_scsi_scan_host(struct ata_port *ap);
 extern int ata_scsi_offline_dev(struct ata_device *dev);
-extern void ata_scsi_hotplug(void *data);
+extern void ata_scsi_hotplug(struct work_struct *work);
 extern unsigned int ata_scsiop_inq_std(struct ata_scsi_args *args, u8 *rbuf,
 			       unsigned int buflen);
 
@@ -111,7 +111,7 @@ extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args,
                         unsigned int (*actor) (struct ata_scsi_args *args,
                                            u8 *rbuf, unsigned int buflen));
 extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
-extern void ata_scsi_dev_rescan(void *data);
+extern void ata_scsi_dev_rescan(struct work_struct *work);
 extern int ata_bus_probe(struct ata_port *ap);
 
 /* libata-eh.c */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index aa1eb4466f9d..3f1b38276e96 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -992,11 +992,11 @@ static void empty(void)
 {
 }
 
-static DECLARE_WORK(floppy_work, NULL, NULL);
+static DECLARE_WORK(floppy_work, NULL);
 
 static void schedule_bh(void (*handler) (void))
 {
-	PREPARE_WORK(&floppy_work, (work_func_t)handler, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)handler);
 	schedule_work(&floppy_work);
 }
 
@@ -1008,7 +1008,7 @@ static void cancel_activity(void)
 
 	spin_lock_irqsave(&floppy_lock, flags);
 	do_floppy = NULL;
-	PREPARE_WORK(&floppy_work, (work_func_t)empty, NULL);
+	PREPARE_WORK(&floppy_work, (work_func_t)empty);
 	del_timer(&fd_timer);
 	spin_unlock_irqrestore(&floppy_lock, flags);
 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index f2ab61f3e8ae..fa764688cad1 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1422,9 +1422,9 @@ static struct keydata {
 
 static unsigned int ip_cnt;
 
-static void rekey_seq_generator(void *private_);
+static void rekey_seq_generator(struct work_struct *work);
 
-static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
+static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator);
 
 /*
  * Lock avoidance:
@@ -1438,7 +1438,7 @@ static DECLARE_DELAYED_WORK(rekey_work, rekey_seq_generator, NULL);
  * happen, and even if that happens only a not perfectly compliant
  * ISN is generated, nothing fatal.
  */
-static void rekey_seq_generator(void *private_)
+static void rekey_seq_generator(struct work_struct *work)
 {
 	struct keydata *keyptr = &ip_keydata[1 ^ (ip_cnt & 1)];
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 5f49280779fb..c64f5bcff947 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -219,13 +219,13 @@ static struct sysrq_key_op sysrq_term_op = {
 	.enable_mask	= SYSRQ_ENABLE_SIGNAL,
 };
 
-static void moom_callback(void *ignored)
+static void moom_callback(struct work_struct *ignored)
 {
 	out_of_memory(&NODE_DATA(0)->node_zonelists[ZONE_NORMAL],
 			GFP_KERNEL, 0);
 }
 
-static DECLARE_WORK(moom_work, moom_callback, NULL);
+static DECLARE_WORK(moom_work, moom_callback);
 
 static void sysrq_handle_moom(int key, struct tty_struct *tty)
 {
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 7297acfe520c..83e9e7d9b58c 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1254,7 +1254,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 	
 /**
  *	do_tty_hangup		-	actual handler for hangup events
- *	@data: tty device
+ *	@work: tty device
  *
  *	This can be called by the "eventd" kernel thread.  That is process
  *	synchronous but doesn't hold any locks, so we need to make sure we
@@ -1274,9 +1274,10 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *		tasklist_lock to walk task list for hangup event
  *
  */
-static void do_tty_hangup(void *data)
+static void do_tty_hangup(struct work_struct *work)
 {
-	struct tty_struct *tty = (struct tty_struct *) data;
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, hangup_work);
 	struct file * cons_filp = NULL;
 	struct file *filp, *f = NULL;
 	struct task_struct *p;
@@ -1433,7 +1434,7 @@ void tty_vhangup(struct tty_struct * tty)
 
 	printk(KERN_DEBUG "%s vhangup...\n", tty_name(tty, buf));
 #endif
-	do_tty_hangup((void *) tty);
+	do_tty_hangup(&tty->hangup_work);
 }
 EXPORT_SYMBOL(tty_vhangup);
 
@@ -3304,12 +3305,13 @@ int tty_ioctl(struct inode * inode, struct file * file,
  * Nasty bug: do_SAK is being called in interrupt context.  This can
  * deadlock.  We punt it up to process context.  AKPM - 16Mar2001
  */
-static void __do_SAK(void *arg)
+static void __do_SAK(struct work_struct *work)
 {
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, SAK_work);
 #ifdef TTY_SOFT_SAK
 	tty_hangup(tty);
 #else
-	struct tty_struct *tty = arg;
 	struct task_struct *g, *p;
 	int session;
 	int		i;
@@ -3388,7 +3390,7 @@ void do_SAK(struct tty_struct *tty)
 {
 	if (!tty)
 		return;
-	PREPARE_WORK(&tty->SAK_work, __do_SAK, tty);
+	PREPARE_WORK(&tty->SAK_work, __do_SAK);
 	schedule_work(&tty->SAK_work);
 }
 
@@ -3396,7 +3398,7 @@ EXPORT_SYMBOL(do_SAK);
 
 /**
  *	flush_to_ldisc
- *	@private_: tty structure passed from work queue.
+ *	@work: tty structure passed from work queue.
  *
  *	This routine is called out of the software interrupt to flush data
  *	from the buffer chain to the line discipline.
@@ -3406,9 +3408,10 @@ EXPORT_SYMBOL(do_SAK);
  *	receive_buf method is single threaded for each tty instance.
  */
  
-static void flush_to_ldisc(void *private_)
+static void flush_to_ldisc(struct work_struct *work)
 {
-	struct tty_struct *tty = (struct tty_struct *) private_;
+	struct tty_struct *tty =
+		container_of(work, struct tty_struct, buf.work.work);
 	unsigned long 	flags;
 	struct tty_ldisc *disc;
 	struct tty_buffer *tbuf, *head;
@@ -3553,7 +3556,7 @@ void tty_flip_buffer_push(struct tty_struct *tty)
 	spin_unlock_irqrestore(&tty->buf.lock, flags);
 
 	if (tty->low_latency)
-		flush_to_ldisc((void *) tty);
+		flush_to_ldisc(&tty->buf.work.work);
 	else
 		schedule_delayed_work(&tty->buf.work, 1);
 }
@@ -3580,17 +3583,17 @@ static void initialize_tty_struct(struct tty_struct *tty)
 	tty->overrun_time = jiffies;
 	tty->buf.head = tty->buf.tail = NULL;
 	tty_buffer_init(tty);
-	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc, tty);
+	INIT_DELAYED_WORK(&tty->buf.work, flush_to_ldisc);
 	init_MUTEX(&tty->buf.pty_sem);
 	mutex_init(&tty->termios_mutex);
 	init_waitqueue_head(&tty->write_wait);
 	init_waitqueue_head(&tty->read_wait);
-	INIT_WORK(&tty->hangup_work, do_tty_hangup, tty);
+	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_read_lock);
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->read_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
-	INIT_WORK(&tty->SAK_work, NULL, NULL);
+	INIT_WORK(&tty->SAK_work, NULL);
 }
 
 /*
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 8e4413f6fbaf..8ee04adc37f0 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -155,7 +155,7 @@ static void con_flush_chars(struct tty_struct *tty);
 static void set_vesa_blanking(char __user *p);
 static void set_cursor(struct vc_data *vc);
 static void hide_cursor(struct vc_data *vc);
-static void console_callback(void *ignored);
+static void console_callback(struct work_struct *ignored);
 static void blank_screen_t(unsigned long dummy);
 static void set_palette(struct vc_data *vc);
 
@@ -174,7 +174,7 @@ static int vesa_blank_mode; /* 0:none 1:suspendV 2:suspendH 3:powerdown */
 static int blankinterval = 10*60*HZ;
 static int vesa_off_interval;
 
-static DECLARE_WORK(console_work, console_callback, NULL);
+static DECLARE_WORK(console_work, console_callback);
 
 /*
  * fg_console is the current virtual console,
@@ -2154,7 +2154,7 @@ out:
  * with other console code and prevention of re-entrancy is
  * ensured with console_sem.
  */
-static void console_callback(void *ignored)
+static void console_callback(struct work_struct *ignored)
 {
 	acquire_console_sem();
 
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index dd0c2623e27b..7a7c6e6dfe4f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -42,7 +42,7 @@ static DEFINE_SPINLOCK(cpufreq_driver_lock);
 
 /* internal prototypes */
 static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event);
-static void handle_update(void *data);
+static void handle_update(struct work_struct *work);
 
 /**
  * Two notifier lists: the "policy" list is involved in the
@@ -665,7 +665,7 @@ static int cpufreq_add_dev (struct sys_device * sys_dev)
 	mutex_init(&policy->lock);
 	mutex_lock(&policy->lock);
 	init_completion(&policy->kobj_unregister);
-	INIT_WORK(&policy->update, handle_update, (void *)(long)cpu);
+	INIT_WORK(&policy->update, handle_update);
 
 	/* call driver. From then on the cpufreq must be able
 	 * to accept all calls to ->verify and ->setpolicy for this CPU
@@ -895,9 +895,11 @@ static int cpufreq_remove_dev (struct sys_device * sys_dev)
 }
 
 
-static void handle_update(void *data)
+static void handle_update(struct work_struct *work)
 {
-	unsigned int cpu = (unsigned int)(long)data;
+	struct cpufreq_policy *policy =
+		container_of(work, struct cpufreq_policy, update);
+	unsigned int cpu = policy->cpu;
 	dprintk("handle_update for cpu %u called\n", cpu);
 	cpufreq_update_policy(cpu);
 }
diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index cbb93669d1ce..8451b29a3db5 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -567,9 +567,9 @@ static int atkbd_set_leds(struct atkbd *atkbd)
  * interrupt context.
  */
 
-static void atkbd_event_work(void *data)
+static void atkbd_event_work(struct work_struct *work)
 {
-	struct atkbd *atkbd = data;
+	struct atkbd *atkbd = container_of(work, struct atkbd, event_work);
 
 	mutex_lock(&atkbd->event_mutex);
 
@@ -943,7 +943,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
-	INIT_WORK(&atkbd->event_work, atkbd_event_work, atkbd);
+	INIT_WORK(&atkbd->event_work, atkbd_event_work);
 	mutex_init(&atkbd->event_mutex);
 
 	switch (serio->id.type) {
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index e5b1b60757bb..b3e84d3bb7f7 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -251,9 +251,9 @@ EXPORT_SYMBOL(ps2_command);
  * ps2_schedule_command(), to a PS/2 device (keyboard, mouse, etc.)
  */
 
-static void ps2_execute_scheduled_command(void *data)
+static void ps2_execute_scheduled_command(struct work_struct *work)
 {
-	struct ps2work *ps2work = data;
+	struct ps2work *ps2work = container_of(work, struct ps2work, work);
 
 	ps2_command(ps2work->ps2dev, ps2work->param, ps2work->command);
 	kfree(ps2work);
@@ -278,7 +278,7 @@ int ps2_schedule_command(struct ps2dev *ps2dev, unsigned char *param, int comman
 	ps2work->ps2dev = ps2dev;
 	ps2work->command = command;
 	memcpy(ps2work->param, param, send);
-	INIT_WORK(&ps2work->work, ps2_execute_scheduled_command, ps2work);
+	INIT_WORK(&ps2work->work, ps2_execute_scheduled_command);
 
 	if (!schedule_work(&ps2work->work)) {
 		kfree(ps2work);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 726ec5e88ab2..03294400bc90 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -183,7 +183,7 @@ void e1000_set_ethtool_ops(struct net_device *netdev);
 static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
 static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
 static void e1000_tx_timeout(struct net_device *dev);
-static void e1000_reset_task(struct net_device *dev);
+static void e1000_reset_task(struct work_struct *work);
 static void e1000_smartspeed(struct e1000_adapter *adapter);
 static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
                                        struct sk_buff *skb);
@@ -908,8 +908,7 @@ e1000_probe(struct pci_dev *pdev,
 	adapter->phy_info_timer.function = &e1000_update_phy_info;
 	adapter->phy_info_timer.data = (unsigned long) adapter;
 
-	INIT_WORK(&adapter->reset_task,
-		(void (*)(void *))e1000_reset_task, netdev);
+	INIT_WORK(&adapter->reset_task, e1000_reset_task);
 
 	e1000_check_options(adapter);
 
@@ -3154,9 +3153,10 @@ e1000_tx_timeout(struct net_device *netdev)
 }
 
 static void
-e1000_reset_task(struct net_device *netdev)
+e1000_reset_task(struct work_struct *work)
 {
-	struct e1000_adapter *adapter = netdev_priv(netdev);
+	struct e1000_adapter *adapter =
+		container_of(work, struct e1000_adapter, reset_task);
 
 	e1000_reinit_locked(adapter);
 }
diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c
index 04c43ef529ac..55866b6b26fa 100644
--- a/drivers/pci/pcie/aer/aerdrv.c
+++ b/drivers/pci/pcie/aer/aerdrv.c
@@ -160,7 +160,7 @@ static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev)
 	rpc->e_lock = SPIN_LOCK_UNLOCKED;
 
 	rpc->rpd = dev;
-	INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev);
+	INIT_WORK(&rpc->dpc_handler, aer_isr);
 	rpc->prod_idx = rpc->cons_idx = 0;
 	mutex_init(&rpc->rpc_mutex);
 	init_waitqueue_head(&rpc->wait_release);
diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h
index daf0cad88fc8..3c0a58f64dd8 100644
--- a/drivers/pci/pcie/aer/aerdrv.h
+++ b/drivers/pci/pcie/aer/aerdrv.h
@@ -118,7 +118,7 @@ extern struct bus_type pcie_port_bus_type;
 extern void aer_enable_rootport(struct aer_rpc *rpc);
 extern void aer_delete_rootport(struct aer_rpc *rpc);
 extern int aer_init(struct pcie_device *dev);
-extern void aer_isr(void *context);
+extern void aer_isr(struct work_struct *work);
 extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 extern int aer_osc_setup(struct pci_dev *dev);
 
diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c
index 1c7e660d6535..08e13033ced8 100644
--- a/drivers/pci/pcie/aer/aerdrv_core.c
+++ b/drivers/pci/pcie/aer/aerdrv_core.c
@@ -690,14 +690,14 @@ static void aer_isr_one_error(struct pcie_device *p_device,
 
 /**
  * aer_isr - consume errors detected by root port
- * @context: pointer to a private data of pcie device
+ * @work: definition of this work item
  *
  * Invoked, as DPC, when root port records new detected error
  **/
-void aer_isr(void *context)
+void aer_isr(struct work_struct *work)
 {
-	struct pcie_device *p_device = (struct pcie_device *) context;
-	struct aer_rpc *rpc = get_service_data(p_device);
+	struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler);
+	struct pcie_device *p_device = rpc->rpd;
 	struct aer_err_source *e_src;
 
 	mutex_lock(&rpc->rpc_mutex);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 94a274645f6f..d3c5e964c964 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -362,9 +362,10 @@ static struct scsi_target *scsi_alloc_target(struct device *parent,
 	goto retry;
 }
 
-static void scsi_target_reap_usercontext(void *data)
+static void scsi_target_reap_usercontext(struct work_struct *work)
 {
-	struct scsi_target *starget = data;
+	struct scsi_target *starget =
+		container_of(work, struct scsi_target, ew.work);
 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
 	unsigned long flags;
 
@@ -400,7 +401,7 @@ void scsi_target_reap(struct scsi_target *starget)
 		starget->state = STARGET_DEL;
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		execute_in_process_context(scsi_target_reap_usercontext,
-					   starget, &starget->ew);
+					   &starget->ew);
 		return;
 
 	}
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index e1a91665d1c2..259c90cfa367 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -218,16 +218,16 @@ static void scsi_device_cls_release(struct class_device *class_dev)
 	put_device(&sdev->sdev_gendev);
 }
 
-static void scsi_device_dev_release_usercontext(void *data)
+static void scsi_device_dev_release_usercontext(struct work_struct *work)
 {
-	struct device *dev = data;
 	struct scsi_device *sdev;
 	struct device *parent;
 	struct scsi_target *starget;
 	unsigned long flags;
 
-	parent = dev->parent;
-	sdev = to_scsi_device(dev);
+	sdev = container_of(work, struct scsi_device, ew.work);
+
+	parent = sdev->sdev_gendev.parent;
 	starget = to_scsi_target(parent);
 
 	spin_lock_irqsave(sdev->host->host_lock, flags);
@@ -258,7 +258,7 @@ static void scsi_device_dev_release_usercontext(void *data)
 static void scsi_device_dev_release(struct device *dev)
 {
 	struct scsi_device *sdp = to_scsi_device(dev);
-	execute_in_process_context(scsi_device_dev_release_usercontext, dev,
+	execute_in_process_context(scsi_device_dev_release_usercontext,
 				   &sdp->ew);
 }
 
diff --git a/fs/aio.c b/fs/aio.c
index 11a1a7100ad6..ca1c5180a17f 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -53,13 +53,13 @@ static kmem_cache_t	*kioctx_cachep;
 static struct workqueue_struct *aio_wq;
 
 /* Used for rare fput completion. */
-static void aio_fput_routine(void *);
-static DECLARE_WORK(fput_work, aio_fput_routine, NULL);
+static void aio_fput_routine(struct work_struct *);
+static DECLARE_WORK(fput_work, aio_fput_routine);
 
 static DEFINE_SPINLOCK(fput_lock);
 static LIST_HEAD(fput_head);
 
-static void aio_kick_handler(void *);
+static void aio_kick_handler(struct work_struct *);
 static void aio_queue_work(struct kioctx *);
 
 /* aio_setup
@@ -227,7 +227,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 	INIT_LIST_HEAD(&ctx->run_list);
-	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler, ctx);
+	INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
 
 	if (aio_setup_ring(ctx) < 0)
 		goto out_freectx;
@@ -470,7 +470,7 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 		wake_up(&ctx->wait);
 }
 
-static void aio_fput_routine(void *data)
+static void aio_fput_routine(struct work_struct *data)
 {
 	spin_lock_irq(&fput_lock);
 	while (likely(!list_empty(&fput_head))) {
@@ -859,9 +859,9 @@ static inline void aio_run_all_iocbs(struct kioctx *ctx)
  *      space.
  * Run on aiod's context.
  */
-static void aio_kick_handler(void *data)
+static void aio_kick_handler(struct work_struct *work)
 {
-	struct kioctx *ctx = data;
+	struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
 	mm_segment_t oldfs = get_fs();
 	int requeue;
 
diff --git a/fs/bio.c b/fs/bio.c
index f95c8749499f..c6c07ca5b5a9 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -955,16 +955,16 @@ static void bio_release_pages(struct bio *bio)
  * run one bio_put() against the BIO.
  */
 
-static void bio_dirty_fn(void *data);
+static void bio_dirty_fn(struct work_struct *work);
 
-static DECLARE_WORK(bio_dirty_work, bio_dirty_fn, NULL);
+static DECLARE_WORK(bio_dirty_work, bio_dirty_fn);
 static DEFINE_SPINLOCK(bio_dirty_lock);
 static struct bio *bio_dirty_list;
 
 /*
  * This runs in process context
  */
-static void bio_dirty_fn(void *data)
+static void bio_dirty_fn(struct work_struct *work)
 {
 	unsigned long flags;
 	struct bio *bio;
diff --git a/fs/file.c b/fs/file.c
index 8e81775c5dc8..3787e82f54c1 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -91,8 +91,10 @@ out:
 	spin_unlock(&fddef->lock);
 }
 
-static void free_fdtable_work(struct fdtable_defer *f)
+static void free_fdtable_work(struct work_struct *work)
 {
+	struct fdtable_defer *f =
+		container_of(work, struct fdtable_defer, wq);
 	struct fdtable *fdt;
 
 	spin_lock_bh(&f->lock);
@@ -351,7 +353,7 @@ static void __devinit fdtable_defer_list_init(int cpu)
 {
 	struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
 	spin_lock_init(&fddef->lock);
-	INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
+	INIT_WORK(&fddef->wq, free_fdtable_work);
 	init_timer(&fddef->timer);
 	fddef->timer.data = (unsigned long)fddef;
 	fddef->timer.function = fdtable_timer;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 6f0487d6f44a..23ab145daa2d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -143,7 +143,7 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	INIT_LIST_HEAD(&clp->cl_state_owners);
 	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
-	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state, clp);
+	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 5ed798bc1cf7..371b804e7cc8 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -18,11 +18,10 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
-static void nfs_expire_automounts(void *list);
+static void nfs_expire_automounts(struct work_struct *work);
 
 LIST_HEAD(nfs_automount_list);
-static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts,
-			    &nfs_automount_list);
+static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
 static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
@@ -165,9 +164,9 @@ struct inode_operations nfs_referral_inode_operations = {
 	.follow_link	= nfs_follow_mountpoint,
 };
 
-static void nfs_expire_automounts(void *data)
+static void nfs_expire_automounts(struct work_struct *work)
 {
-	struct list_head *list = (struct list_head *)data;
+	struct list_head *list = &nfs_automount_list;
 
 	mark_mounts_for_expiry(list);
 	if (!list_empty(list))
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 6f346677332d..c26cd978c7cc 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -185,7 +185,7 @@ extern const u32 nfs4_fs_locations_bitmap[2];
 extern void nfs4_schedule_state_renewal(struct nfs_client *);
 extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
 extern void nfs4_kill_renewd(struct nfs_client *);
-extern void nfs4_renew_state(void *);
+extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 7b6df1852e75..823298561c0a 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -59,9 +59,10 @@
 #define NFSDBG_FACILITY	NFSDBG_PROC
 
 void
-nfs4_renew_state(void *data)
+nfs4_renew_state(struct work_struct *work)
 {
-	struct nfs_client *clp = (struct nfs_client *)data;
+	struct nfs_client *clp =
+		container_of(work, struct nfs_client, cl_renewd.work);
 	struct rpc_cred *cred;
 	long lease, timeout;
 	unsigned long last, now;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 5f04006e8dd2..b3f32eadbef5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -568,6 +568,7 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
+	void			*port_task_data;
 	struct delayed_work	port_task;
 	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
@@ -747,7 +748,7 @@ extern int ata_ratelimit(void);
 extern unsigned int ata_busy_sleep(struct ata_port *ap,
 				   unsigned long timeout_pat,
 				   unsigned long timeout);
-extern void ata_port_queue_task(struct ata_port *ap, void (*fn)(void *),
+extern void ata_port_queue_task(struct ata_port *ap, work_func_t fn,
 				void *data, unsigned long delay);
 extern u32 ata_wait_register(void __iomem *reg, u32 mask, u32 val,
 			     unsigned long interval_msec,
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ecc017d24cf3..4a3ea83c6d16 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,18 +11,19 @@
 
 struct workqueue_struct;
 
-typedef void (*work_func_t)(void *data);
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
 
 struct work_struct {
-	/* the first word is the work queue pointer and the pending flag
-	 * rolled into one */
+	/* the first word is the work queue pointer and the flags rolled into
+	 * one */
 	unsigned long management;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
-	void *data;
 };
 
 struct delayed_work {
@@ -34,48 +35,77 @@ struct execute_work {
 	struct work_struct work;
 };
 
-#define __WORK_INITIALIZER(n, f, d) {				\
+#define __WORK_INITIALIZER(n, f) {				\
+	.management = 0,					\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
-	.data = (d),						\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f, d) {			\
-	.work = __WORK_INITIALIZER((n).work, (f), (d)),		\
+#define __WORK_INITIALIZER_NAR(n, f) {				\
+	.management = (1 << WORK_STRUCT_NOAUTOREL),		\
+        .entry	= { &(n).entry, &(n).entry },			\
+	.func = (f),						\
+	}
+
+#define __DELAYED_WORK_INITIALIZER(n, f) {			\
+	.work = __WORK_INITIALIZER((n).work, (f)),		\
+	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
+	}
+
+#define __DELAYED_WORK_INITIALIZER_NAR(n, f) {			\
+	.work = __WORK_INITIALIZER_NAR((n).work, (f)),		\
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
-#define DECLARE_WORK(n, f, d)					\
-	struct work_struct n = __WORK_INITIALIZER(n, f, d)
+#define DECLARE_WORK(n, f)					\
+	struct work_struct n = __WORK_INITIALIZER(n, f)
+
+#define DECLARE_WORK_NAR(n, f)					\
+	struct work_struct n = __WORK_INITIALIZER_NAR(n, f)
 
-#define DECLARE_DELAYED_WORK(n, f, d)				\
-	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, d)
+#define DECLARE_DELAYED_WORK(n, f)				\
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+
+#define DECLARE_DELAYED_WORK_NAR(n, f)			\
+	struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f)
 
 /*
- * initialize a work item's function and data pointers
+ * initialize a work item's function pointer
  */
-#define PREPARE_WORK(_work, _func, _data)			\
+#define PREPARE_WORK(_work, _func)				\
 	do {							\
 		(_work)->func = (_func);			\
-		(_work)->data = (_data);			\
 	} while (0)
 
-#define PREPARE_DELAYED_WORK(_work, _func, _data)		\
-	PREPARE_WORK(&(_work)->work, (_func), (_data))
+#define PREPARE_DELAYED_WORK(_work, _func)			\
+	PREPARE_WORK(&(_work)->work, (_func))
 
 /*
  * initialize all of a work item in one go
  */
-#define INIT_WORK(_work, _func, _data)				\
+#define INIT_WORK(_work, _func)					\
 	do {							\
-		INIT_LIST_HEAD(&(_work)->entry);		\
 		(_work)->management = 0;			\
-		PREPARE_WORK((_work), (_func), (_data));	\
+		INIT_LIST_HEAD(&(_work)->entry);		\
+		PREPARE_WORK((_work), (_func));			\
+	} while (0)
+
+#define INIT_WORK_NAR(_work, _func)					\
+	do {								\
+		(_work)->management = (1 << WORK_STRUCT_NOAUTOREL);	\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+
+#define INIT_DELAYED_WORK(_work, _func)				\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func));		\
+		init_timer(&(_work)->timer);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func, _data)		\
+#define INIT_DELAYED_WORK_NAR(_work, _func)			\
 	do {							\
-		INIT_WORK(&(_work)->work, (_func), (_data));	\
+		INIT_WORK_NAR(&(_work)->work, (_func));		\
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
@@ -94,6 +124,27 @@ struct execute_work {
 #define delayed_work_pending(work) \
 	test_bit(WORK_STRUCT_PENDING, &(work)->work.management)
 
+/**
+ * work_release - Release a work item under execution
+ * @work: The work item to release
+ *
+ * This is used to release a work item that has been initialised with automatic
+ * release mode disabled (WORK_STRUCT_NOAUTOREL is set).  This gives the work
+ * function the opportunity to grab auxiliary data from the container of the
+ * work_struct before clearing the pending bit as the work_struct may be
+ * subject to deallocation the moment the pending bit is cleared.
+ *
+ * In such a case, this should be called in the work function after it has
+ * fetched any data it may require from the containter of the work_struct.
+ * After this function has been called, the work_struct may be scheduled for
+ * further execution or it may be deallocated unless other precautions are
+ * taken.
+ *
+ * This should also be used to release a delayed work item.
+ */
+#define work_release(work) \
+	clear_bit(WORK_STRUCT_PENDING, &(work)->management)
+
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread);
@@ -112,7 +163,7 @@ extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
-extern int schedule_on_each_cpu(work_func_t func, void *info);
+extern int schedule_on_each_cpu(work_func_t func);
 extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
@@ -121,7 +172,7 @@ extern void init_workqueues(void);
 void cancel_rearming_delayed_work(struct delayed_work *work);
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
 				       struct delayed_work *);
-int execute_in_process_context(work_func_t fn, void *, struct execute_work *);
+int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 5f48748fe017..f7be1ac73601 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -84,7 +84,7 @@ struct inet_timewait_death_row {
 };
 
 extern void inet_twdr_hangman(unsigned long data);
-extern void inet_twdr_twkill_work(void *data);
+extern void inet_twdr_twkill_work(struct work_struct *work);
 extern void inet_twdr_twcal_tick(unsigned long data);
 
 #if (BITS_PER_LONG == 64)
diff --git a/ipc/util.c b/ipc/util.c
index cd8bb14a431f..a9b7a227b8d4 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -514,6 +514,11 @@ void ipc_rcu_getref(void *ptr)
 	container_of(ptr, struct ipc_rcu_hdr, data)->refcount++;
 }
 
+static void ipc_do_vfree(struct work_struct *work)
+{
+	vfree(container_of(work, struct ipc_rcu_sched, work));
+}
+
 /**
  * ipc_schedule_free - free ipc + rcu space
  * @head: RCU callback structure for queued work
@@ -528,7 +533,7 @@ static void ipc_schedule_free(struct rcu_head *head)
 	struct ipc_rcu_sched *sched =
 			container_of(&(grace->data[0]), struct ipc_rcu_sched, data[0]);
 
-	INIT_WORK(&sched->work, vfree, sched);
+	INIT_WORK(&sched->work, ipc_do_vfree);
 	schedule_work(&sched->work);
 }
 
diff --git a/kernel/kmod.c b/kernel/kmod.c
index bb4e29d924e4..7dc7a9dad6ac 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -114,6 +114,7 @@ EXPORT_SYMBOL(request_module);
 #endif /* CONFIG_KMOD */
 
 struct subprocess_info {
+	struct work_struct work;
 	struct completion *complete;
 	char *path;
 	char **argv;
@@ -221,9 +222,10 @@ static int wait_for_helper(void *data)
 }
 
 /* This is run by khelper thread  */
-static void __call_usermodehelper(void *data)
+static void __call_usermodehelper(struct work_struct *work)
 {
-	struct subprocess_info *sub_info = data;
+	struct subprocess_info *sub_info =
+		container_of(work, struct subprocess_info, work);
 	pid_t pid;
 	int wait = sub_info->wait;
 
@@ -264,6 +266,8 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 {
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct subprocess_info sub_info = {
+		.work		= __WORK_INITIALIZER(sub_info.work,
+						     __call_usermodehelper),
 		.complete	= &done,
 		.path		= path,
 		.argv		= argv,
@@ -272,7 +276,6 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 		.wait		= wait,
 		.retval		= 0,
 	};
-	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
 	if (!khelper_wq)
 		return -EBUSY;
@@ -280,7 +283,7 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp,
 	if (path[0] == '\0')
 		return 0;
 
-	queue_work(khelper_wq, &work);
+	queue_work(khelper_wq, &sub_info.work);
 	wait_for_completion(&done);
 	return sub_info.retval;
 }
@@ -291,6 +294,8 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 {
 	DECLARE_COMPLETION(done);
 	struct subprocess_info sub_info = {
+		.work		= __WORK_INITIALIZER(sub_info.work,
+						     __call_usermodehelper),
 		.complete	= &done,
 		.path		= path,
 		.argv		= argv,
@@ -298,7 +303,6 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 		.retval		= 0,
 	};
 	struct file *f;
-	DECLARE_WORK(work, __call_usermodehelper, &sub_info);
 
 	if (!khelper_wq)
 		return -EBUSY;
@@ -318,7 +322,7 @@ int call_usermodehelper_pipe(char *path, char **argv, char **envp,
 	}
 	sub_info.stdin = f;
 
-	queue_work(khelper_wq, &work);
+	queue_work(khelper_wq, &sub_info.work);
 	wait_for_completion(&done);
 	return sub_info.retval;
 }
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 4f9c60ef95e8..1db8c72d0d38 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -31,6 +31,8 @@ struct kthread_create_info
 	/* Result passed back to kthread_create() from keventd. */
 	struct task_struct *result;
 	struct completion done;
+
+	struct work_struct work;
 };
 
 struct kthread_stop_info
@@ -111,9 +113,10 @@ static int kthread(void *_create)
 }
 
 /* We are keventd: create a thread. */
-static void keventd_create_kthread(void *_create)
+static void keventd_create_kthread(struct work_struct *work)
 {
-	struct kthread_create_info *create = _create;
+	struct kthread_create_info *create =
+		container_of(work, struct kthread_create_info, work);
 	int pid;
 
 	/* We want our own signal handler (we take no signals by default). */
@@ -154,20 +157,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
 				   ...)
 {
 	struct kthread_create_info create;
-	DECLARE_WORK(work, keventd_create_kthread, &create);
 
 	create.threadfn = threadfn;
 	create.data = data;
 	init_completion(&create.started);
 	init_completion(&create.done);
+	INIT_WORK(&create.work, keventd_create_kthread);
 
 	/*
 	 * The workqueue needs to start up first:
 	 */
 	if (!helper_wq)
-		work.func(work.data);
+		create.work.func(&create.work);
 	else {
-		queue_work(helper_wq, &work);
+		queue_work(helper_wq, &create.work);
 		wait_for_completion(&create.done);
 	}
 	if (!IS_ERR(create.result)) {
diff --git a/kernel/power/poweroff.c b/kernel/power/poweroff.c
index f1f900ac3164..678ec736076b 100644
--- a/kernel/power/poweroff.c
+++ b/kernel/power/poweroff.c
@@ -16,12 +16,12 @@
  * callback we use.
  */
 
-static void do_poweroff(void *dummy)
+static void do_poweroff(struct work_struct *dummy)
 {
 	kernel_power_off();
 }
 
-static DECLARE_WORK(poweroff_work, do_poweroff, NULL);
+static DECLARE_WORK(poweroff_work, do_poweroff);
 
 static void handle_poweroff(int key, struct tty_struct *tty)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 98489d82801b..c87b461de38d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -880,7 +880,7 @@ asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user
 	return 0;
 }
 
-static void deferred_cad(void *dummy)
+static void deferred_cad(struct work_struct *dummy)
 {
 	kernel_restart(NULL);
 }
@@ -892,7 +892,7 @@ static void deferred_cad(void *dummy)
  */
 void ctrl_alt_del(void)
 {
-	static DECLARE_WORK(cad_work, deferred_cad, NULL);
+	static DECLARE_WORK(cad_work, deferred_cad);
 
 	if (C_A_D)
 		schedule_work(&cad_work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 967479756511..8d1e7cb8a51a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -241,14 +241,14 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
-		void *data = work->data;
 
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
 		BUG_ON(get_wq_data(work) != cwq);
-		clear_bit(WORK_STRUCT_PENDING, &work->management);
-		f(data);
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+			work_release(work);
+		f(work);
 
 		spin_lock_irqsave(&cwq->lock, flags);
 		cwq->remove_sequence++;
@@ -527,7 +527,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
 /**
  * schedule_on_each_cpu - call a function on each online CPU from keventd
  * @func: the function to call
- * @info: a pointer to pass to func()
  *
  * Returns zero on success.
  * Returns -ve errno on failure.
@@ -536,7 +535,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  *
  * schedule_on_each_cpu() is very slow.
  */
-int schedule_on_each_cpu(work_func_t func, void *info)
+int schedule_on_each_cpu(work_func_t func)
 {
 	int cpu;
 	struct work_struct *works;
@@ -547,7 +546,7 @@ int schedule_on_each_cpu(work_func_t func, void *info)
 
 	mutex_lock(&workqueue_mutex);
 	for_each_online_cpu(cpu) {
-		INIT_WORK(per_cpu_ptr(works, cpu), func, info);
+		INIT_WORK(per_cpu_ptr(works, cpu), func);
 		__queue_work(per_cpu_ptr(keventd_wq->cpu_wq, cpu),
 				per_cpu_ptr(works, cpu));
 	}
@@ -591,7 +590,6 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
 /**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:		the function to execute
- * @data:	data to pass to the function
  * @ew:		guaranteed storage for the execute work structure (must
  *		be available when the work executes)
  *
@@ -601,15 +599,14 @@ EXPORT_SYMBOL(cancel_rearming_delayed_work);
  * Returns:	0 - function was executed
  *		1 - function was scheduled for execution
  */
-int execute_in_process_context(work_func_t fn, void *data,
-			       struct execute_work *ew)
+int execute_in_process_context(work_func_t fn, struct execute_work *ew)
 {
 	if (!in_interrupt()) {
-		fn(data);
+		fn(&ew->work);
 		return 0;
 	}
 
-	INIT_WORK(&ew->work, fn, data);
+	INIT_WORK(&ew->work, fn);
 	schedule_work(&ew->work);
 
 	return 1;
diff --git a/mm/slab.c b/mm/slab.c
index a65bc5e992c3..5de81473df34 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache,
 static void free_block(struct kmem_cache *cachep, void **objpp, int len,
 			int node);
 static int enable_cpucache(struct kmem_cache *cachep);
-static void cache_reap(void *unused);
+static void cache_reap(struct work_struct *unused);
 
 /*
  * This function must be completely optimized away if a constant is passed to
@@ -925,7 +925,7 @@ static void __devinit start_cpu_timer(int cpu)
 	 */
 	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_DELAYED_WORK(reap_work, cache_reap, NULL);
+		INIT_DELAYED_WORK(reap_work, cache_reap);
 		schedule_delayed_work_on(cpu, reap_work, HZ + 3 * cpu);
 	}
 }
@@ -3815,7 +3815,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
  * If we cannot acquire the cache chain mutex then just give up - we'll try
  * again on the next iteration.
  */
-static void cache_reap(void *unused)
+static void cache_reap(struct work_struct *unused)
 {
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index f2ed09e25dfd..549a2ce951b0 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -34,8 +34,8 @@ enum lw_bits {
 static unsigned long linkwatch_flags;
 static unsigned long linkwatch_nextevent;
 
-static void linkwatch_event(void *dummy);
-static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event, NULL);
+static void linkwatch_event(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
 
 static LIST_HEAD(lweventlist);
 static DEFINE_SPINLOCK(lweventlist_lock);
@@ -127,7 +127,7 @@ void linkwatch_run_queue(void)
 }       
 
 
-static void linkwatch_event(void *dummy)
+static void linkwatch_event(struct work_struct *dummy)
 {
 	/* Limit the number of linkwatch events to one
 	 * per second so that a runaway driver does not
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index cdd805344c61..8c74f9168b7d 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -197,9 +197,10 @@ EXPORT_SYMBOL_GPL(inet_twdr_hangman);
 
 extern void twkill_slots_invalid(void);
 
-void inet_twdr_twkill_work(void *data)
+void inet_twdr_twkill_work(struct work_struct *work)
 {
-	struct inet_timewait_death_row *twdr = data;
+	struct inet_timewait_death_row *twdr =
+		container_of(work, struct inet_timewait_death_row, twkill_work);
 	int i;
 
 	if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0163d9826907..af7b2c986b1f 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -45,8 +45,7 @@ struct inet_timewait_death_row tcp_death_row = {
 	.tw_timer	= TIMER_INITIALIZER(inet_twdr_hangman, 0,
 					    (unsigned long)&tcp_death_row),
 	.twkill_work	= __WORK_INITIALIZER(tcp_death_row.twkill_work,
-					     inet_twdr_twkill_work,
-					     &tcp_death_row),
+					     inet_twdr_twkill_work),
 /* Short-time timewait calendar */
 
 	.twcal_hand	= -1,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index d5725cb1491e..d96fd466a9a4 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -284,8 +284,8 @@ static struct file_operations cache_file_operations;
 static struct file_operations content_file_operations;
 static struct file_operations cache_flush_operations;
 
-static void do_cache_clean(void *data);
-static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean, NULL);
+static void do_cache_clean(struct work_struct *work);
+static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
 
 void cache_register(struct cache_detail *cd)
 {
@@ -461,7 +461,7 @@ static int cache_clean(void)
 /*
  * We want to regularly clean the cache, so we need to schedule some work ...
  */
-static void do_cache_clean(void *data)
+static void do_cache_clean(struct work_struct *work)
 {
 	int delay = 5;
 	if (cache_clean() == -1)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 97be3f7fed44..49dba5febbbd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -54,10 +54,11 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
 }
 
 static void
-rpc_timeout_upcall_queue(void *data)
+rpc_timeout_upcall_queue(struct work_struct *work)
 {
 	LIST_HEAD(free_list);
-	struct rpc_inode *rpci = (struct rpc_inode *)data;
+	struct rpc_inode *rpci =
+		container_of(work, struct rpc_inode, queue_timeout.work);
 	struct inode *inode = &rpci->vfs_inode;
 	void (*destroy_msg)(struct rpc_pipe_msg *);
 
@@ -838,7 +839,7 @@ init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 		rpci->pipelen = 0;
 		init_waitqueue_head(&rpci->waitq);
 		INIT_DELAYED_WORK(&rpci->queue_timeout,
-				    rpc_timeout_upcall_queue, rpci);
+				    rpc_timeout_upcall_queue);
 		rpci->ops = NULL;
 	}
 }
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index a1ab4eed41f4..eff44bcdc95a 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,7 +41,7 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-static void			rpc_async_schedule(void *);
+static void			rpc_async_schedule(struct work_struct *);
 
 /*
  * RPC tasks sit here while waiting for conditions to improve.
@@ -305,7 +305,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 	if (RPC_IS_ASYNC(task)) {
 		int status;
 
-		INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task);
+		INIT_WORK(&task->u.tk_work, rpc_async_schedule);
 		status = queue_work(task->tk_workqueue, &task->u.tk_work);
 		if (status < 0) {
 			printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
@@ -695,9 +695,9 @@ rpc_execute(struct rpc_task *task)
 	return __rpc_execute(task);
 }
 
-static void rpc_async_schedule(void *arg)
+static void rpc_async_schedule(struct work_struct *work)
 {
-	__rpc_execute((struct rpc_task *)arg);
+	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
 }
 
 /**
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 80857470dc11..4f9a5d9791fb 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -479,9 +479,10 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
 	return status;
 }
 
-static void xprt_autoclose(void *args)
+static void xprt_autoclose(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, task_cleanup);
 
 	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
@@ -932,7 +933,7 @@ struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t si
 
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv);
-	INIT_WORK(&xprt->task_cleanup, xprt_autoclose, xprt);
+	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
 	init_timer(&xprt->timer);
 	xprt->timer.function = xprt_init_autodisconnect;
 	xprt->timer.data = (unsigned long) xprt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3c7532cd009e..cfe3c15be948 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1060,13 +1060,14 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 
 /**
  * xs_udp_connect_worker - set up a UDP socket
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_udp_connect_worker(void *args)
+static void xs_udp_connect_worker(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *) args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, connect_worker.work);
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
@@ -1144,13 +1145,14 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
 
 /**
  * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint
- * @args: RPC transport to connect
+ * @work: RPC transport to connect
  *
  * Invoked by a work queue tasklet.
  */
-static void xs_tcp_connect_worker(void *args)
+static void xs_tcp_connect_worker(struct work_struct *work)
 {
-	struct rpc_xprt *xprt = (struct rpc_xprt *)args;
+	struct rpc_xprt *xprt =
+		container_of(work, struct rpc_xprt, connect_worker.work);
 	struct socket *sock = xprt->sock;
 	int err, status = -EIO;
 
@@ -1375,7 +1377,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	/* XXX: header size can vary due to auth type, IPv6, etc. */
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1420,7 +1422,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker, xprt);
+	INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker);
 	xprt->bind_timeout = XS_BIND_TO;
 	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
diff --git a/security/keys/key.c b/security/keys/key.c
index 80de8c3e9cc3..70eacbe5abde 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -30,8 +30,8 @@ DEFINE_SPINLOCK(key_user_lock);
 static LIST_HEAD(key_types_list);
 static DECLARE_RWSEM(key_types_sem);
 
-static void key_cleanup(void *data);
-static DECLARE_WORK(key_cleanup_task, key_cleanup, NULL);
+static void key_cleanup(struct work_struct *work);
+static DECLARE_WORK(key_cleanup_task, key_cleanup);
 
 /* we serialise key instantiation and link */
 DECLARE_RWSEM(key_construction_sem);
@@ -552,7 +552,7 @@ EXPORT_SYMBOL(key_negate_and_link);
  * do cleaning up in process context so that we don't have to disable
  * interrupts all over the place
  */
-static void key_cleanup(void *data)
+static void key_cleanup(struct work_struct *work)
 {
 	struct rb_node *_n;
 	struct key *key;
-- 
cgit v1.2.3


From 341a595850dac1b0503df34260257d71b4fdf72c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 6 Dec 2006 20:34:49 -0800
Subject: [PATCH] Support for freezeable workqueues

Make it possible to create a workqueue the worker thread of which will be
frozen during suspend, along with other kernel threads.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Nigel Cunningham <nigel@suspend2.net>
Cc: David Chinner <dgc@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h |  8 +++++---
 kernel/workqueue.c        | 20 ++++++++++++++------
 2 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4a3ea83c6d16..f0cb1df7b475 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -147,9 +147,11 @@ struct execute_work {
 
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread);
-#define create_workqueue(name) __create_workqueue((name), 0)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1)
+						    int singlethread,
+						    int freezeable);
+#define create_workqueue(name) __create_workqueue((name), 0, 0)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 0, 1)
+#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8d1e7cb8a51a..2945b094d871 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -29,6 +29,7 @@
 #include <linux/kthread.h>
 #include <linux/hardirq.h>
 #include <linux/mempolicy.h>
+#include <linux/freezer.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -55,6 +56,8 @@ struct cpu_workqueue_struct {
 	struct task_struct *thread;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
+
+	int freezeable;		/* Freeze the thread during suspend */
 } ____cacheline_aligned;
 
 /*
@@ -265,7 +268,8 @@ static int worker_thread(void *__cwq)
 	struct k_sigaction sa;
 	sigset_t blocked;
 
-	current->flags |= PF_NOFREEZE;
+	if (!cwq->freezeable)
+		current->flags |= PF_NOFREEZE;
 
 	set_user_nice(current, -5);
 
@@ -288,6 +292,9 @@ static int worker_thread(void *__cwq)
 
 	set_current_state(TASK_INTERRUPTIBLE);
 	while (!kthread_should_stop()) {
+		if (cwq->freezeable)
+			try_to_freeze();
+
 		add_wait_queue(&cwq->more_work, &wait);
 		if (list_empty(&cwq->worklist))
 			schedule();
@@ -364,7 +371,7 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
-						   int cpu)
+						   int cpu, int freezeable)
 {
 	struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu);
 	struct task_struct *p;
@@ -374,6 +381,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 	cwq->thread = NULL;
 	cwq->insert_sequence = 0;
 	cwq->remove_sequence = 0;
+	cwq->freezeable = freezeable;
 	INIT_LIST_HEAD(&cwq->worklist);
 	init_waitqueue_head(&cwq->more_work);
 	init_waitqueue_head(&cwq->work_done);
@@ -389,7 +397,7 @@ static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 }
 
 struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread)
+					    int singlethread, int freezeable)
 {
 	int cpu, destroy = 0;
 	struct workqueue_struct *wq;
@@ -409,7 +417,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	mutex_lock(&workqueue_mutex);
 	if (singlethread) {
 		INIT_LIST_HEAD(&wq->list);
-		p = create_workqueue_thread(wq, singlethread_cpu);
+		p = create_workqueue_thread(wq, singlethread_cpu, freezeable);
 		if (!p)
 			destroy = 1;
 		else
@@ -417,7 +425,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	} else {
 		list_add(&wq->list, &workqueues);
 		for_each_online_cpu(cpu) {
-			p = create_workqueue_thread(wq, cpu);
+			p = create_workqueue_thread(wq, cpu, freezeable);
 			if (p) {
 				kthread_bind(p, cpu);
 				wake_up_process(p);
@@ -667,7 +675,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		mutex_lock(&workqueue_mutex);
 		/* Create a new workqueue thread for it. */
 		list_for_each_entry(wq, &workqueues, list) {
-			if (!create_workqueue_thread(wq, hotcpu)) {
+			if (!create_workqueue_thread(wq, hotcpu, 0)) {
 				printk("workqueue for %i failed\n", hotcpu);
 				return NOTIFY_BAD;
 			}
-- 
cgit v1.2.3


From 68380b581383c028830f79ec2670f4a193854aa6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Thu, 7 Dec 2006 09:28:19 -0800
Subject: Add "run_scheduled_work()" workqueue function

This allows workqueue users to run just their own pending work, rather
than wait for the whole workqueue to finish running.  This solves the
deadlock with networking libphy that was due to other workqueue entries
possibly needing a lock that was held by the routine that wanted to
flush its own work.

It's not wonderful: if you absolutely need to synchronize with the work
function having been executed, any user strictly speaking should have
its own completion tracking logic, since when we run things explicitly
by hand, the generic workqueue layer can no longer help us synchronize.

Also, this is strictly only usable for work that has been scheduled
without any delayed timers.  You can not mix the new interface with
schedule_delayed_work().

But it's better than what we had currently.

Acked-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/net/phy/phy.c     |  3 +-
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 73 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 4044bb1ada86..e175f3910b18 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -587,8 +587,7 @@ int phy_stop_interrupts(struct phy_device *phydev)
 	 * Finish any pending work; we might have been scheduled
 	 * to be called from keventd ourselves, though.
 	 */
-	if (!current_is_keventd())
-		flush_scheduled_work();
+	run_scheduled_work(&phydev->phy_queue);
 
 	free_irq(phydev->irq, phydev);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f0cb1df7b475..edef8d50b26b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -162,6 +162,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
+extern int FASTCALL(run_scheduled_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c5257316f4b9..6b186750e9be 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -108,6 +108,79 @@ static inline void *get_wq_data(struct work_struct *work)
 	return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
 }
 
+static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
+{
+	int ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cwq->lock, flags);
+	/*
+	 * We need to re-validate the work info after we've gotten
+	 * the cpu_workqueue lock. We can run the work now iff:
+	 *
+	 *  - the wq_data still matches the cpu_workqueue_struct
+	 *  - AND the work is still marked pending
+	 *  - AND the work is still on a list (which will be this
+	 *    workqueue_struct list)
+	 *
+	 * All these conditions are important, because we
+	 * need to protect against the work being run right
+	 * now on another CPU (all but the last one might be
+	 * true if it's currently running and has not been
+	 * released yet, for example).
+	 */
+	if (get_wq_data(work) == cwq
+	    && work_pending(work)
+	    && !list_empty(&work->entry)) {
+		work_func_t f = work->func;
+		list_del_init(&work->entry);
+		spin_unlock_irqrestore(&cwq->lock, flags);
+
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+			work_release(work);
+		f(work);
+
+		spin_lock_irqsave(&cwq->lock, flags);
+		cwq->remove_sequence++;
+		wake_up(&cwq->work_done);
+		ret = 1;
+	}
+	spin_unlock_irqrestore(&cwq->lock, flags);
+	return ret;
+}
+
+/**
+ * run_scheduled_work - run scheduled work synchronously
+ * @work: work to run
+ *
+ * This checks if the work was pending, and runs it
+ * synchronously if so. It returns a boolean to indicate
+ * whether it had any scheduled work to run or not.
+ *
+ * NOTE! This _only_ works for normal work_structs. You
+ * CANNOT use this for delayed work, because the wq data
+ * for delayed work will not point properly to the per-
+ * CPU workqueue struct, but will change!
+ */
+int fastcall run_scheduled_work(struct work_struct *work)
+{
+	for (;;) {
+		struct cpu_workqueue_struct *cwq;
+
+		if (!work_pending(work))
+			return 0;
+		if (list_empty(&work->entry))
+			return 0;
+		/* NOTE! This depends intimately on __queue_work! */
+		cwq = get_wq_data(work);
+		if (!cwq)
+			return 0;
+		if (__run_work(cwq, work))
+			return 1;
+	}
+}
+EXPORT_SYMBOL(run_scheduled_work);
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
-- 
cgit v1.2.3


From 0221872a3b0aa2fa2f3fa60affcbaebd662c4a90 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Fri, 15 Dec 2006 14:13:51 -0800
Subject: Fix "delayed_work_pending()" macro expansion

Nobody uses it, but it was still wrong.  Using the macro argument name
'work' meant that when we used 'work' as a member name, that would also
get replaced by the macro argument.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index edef8d50b26b..5b13dcf02714 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -121,8 +121,8 @@ struct execute_work {
  * pending
  * @work: The work item in question
  */
-#define delayed_work_pending(work) \
-	test_bit(WORK_STRUCT_PENDING, &(work)->work.management)
+#define delayed_work_pending(w) \
+	work_pending(&(w)->work)
 
 /**
  * work_release - Release a work item under execution
-- 
cgit v1.2.3


From a08727bae727fc2ca3a6ee9506d77786b71070b3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Sat, 16 Dec 2006 09:53:50 -0800
Subject: Make workqueue bit operations work on "atomic_long_t"

On architectures where the atomicity of the bit operations is handled by
external means (ie a separate spinlock to protect concurrent accesses),
just doing a direct assignment on the workqueue data field (as done by
commit 4594bf159f1962cec3b727954b7c598b07e2e737) can cause the
assignment to be lost due to lack of serialization with the bitops on
the same word.

So we need to serialize the assignment with the locks on those
architectures (notably older ARM chips, PA-RISC and sparc32).

So rather than using an "unsigned long", let's use "atomic_long_t",
which already has a safe assignment operation (atomic_long_set()) on
such architectures.

This requires that the atomic operations use the same atomicity locks as
the bit operations do, but that is largely the case anyway.  Sparc32
will probably need fixing.

Architectures (including modern ARM with LL/SC) that implement sane
atomic operations for SMP won't see any of this matter.

Cc: Russell King <rmk+lkml@arm.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: David Miller <davem@davemloft.com>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Linux Arch Maintainers <linux-arch@vger.kernel.org>
Cc: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/workqueue.h | 32 ++++++++++++++++++++++----------
 kernel/workqueue.c        | 16 ++++++++--------
 2 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5b13dcf02714..2a7b38d87018 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,16 +8,21 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <asm/atomic.h>
 
 struct workqueue_struct;
 
 struct work_struct;
 typedef void (*work_func_t)(struct work_struct *work);
 
+/*
+ * The first word is the work queue pointer and the flags rolled into
+ * one
+ */
+#define work_data_bits(work) ((unsigned long *)(&(work)->data))
+
 struct work_struct {
-	/* the first word is the work queue pointer and the flags rolled into
-	 * one */
-	unsigned long management;
+	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
 #define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
@@ -26,6 +31,9 @@ struct work_struct {
 	work_func_t func;
 };
 
+#define WORK_DATA_INIT(autorelease) \
+	ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL)
+
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
@@ -36,13 +44,13 @@ struct execute_work {
 };
 
 #define __WORK_INITIALIZER(n, f) {				\
-	.management = 0,					\
+	.data = WORK_DATA_INIT(0),				\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	}
 
 #define __WORK_INITIALIZER_NAR(n, f) {				\
-	.management = (1 << WORK_STRUCT_NOAUTOREL),		\
+	.data = WORK_DATA_INIT(1),				\
         .entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	}
@@ -82,17 +90,21 @@ struct execute_work {
 
 /*
  * initialize all of a work item in one go
+ *
+ * NOTE! No point in using "atomic_long_set()": useing a direct
+ * assignment of the work data initializer allows the compiler
+ * to generate better code.
  */
 #define INIT_WORK(_work, _func)					\
 	do {							\
-		(_work)->management = 0;			\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT(0);	\
 		INIT_LIST_HEAD(&(_work)->entry);		\
 		PREPARE_WORK((_work), (_func));			\
 	} while (0)
 
 #define INIT_WORK_NAR(_work, _func)					\
 	do {								\
-		(_work)->management = (1 << WORK_STRUCT_NOAUTOREL);	\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT(1);	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
@@ -114,7 +126,7 @@ struct execute_work {
  * @work: The work item in question
  */
 #define work_pending(work) \
-	test_bit(WORK_STRUCT_PENDING, &(work)->management)
+	test_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 /**
  * delayed_work_pending - Find out whether a delayable work item is currently
@@ -143,7 +155,7 @@ struct execute_work {
  * This should also be used to release a delayed work item.
  */
 #define work_release(work) \
-	clear_bit(WORK_STRUCT_PENDING, &(work)->management)
+	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
 extern struct workqueue_struct *__create_workqueue(const char *name,
@@ -188,7 +200,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
-		clear_bit(WORK_STRUCT_PENDING, &work->work.management);
+		work_release(&work->work);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index db49886bfae1..742cbbe49bdc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -96,13 +96,13 @@ static inline void set_wq_data(struct work_struct *work, void *wq)
 	BUG_ON(!work_pending(work));
 
 	new = (unsigned long) wq | (1UL << WORK_STRUCT_PENDING);
-	new |= work->management & WORK_STRUCT_FLAG_MASK;
-	work->management = new;
+	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
+	atomic_long_set(&work->data, new);
 }
 
 static inline void *get_wq_data(struct work_struct *work)
 {
-	return (void *) (work->management & WORK_STRUCT_WQ_DATA_MASK);
+	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
 static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
@@ -133,7 +133,7 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work
 		list_del_init(&work->entry);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
-		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
 			work_release(work);
 		f(work);
 
@@ -206,7 +206,7 @@ int fastcall queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
 	int ret = 0, cpu = get_cpu();
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		if (unlikely(is_single_threaded(wq)))
 			cpu = singlethread_cpu;
 		BUG_ON(!list_empty(&work->entry));
@@ -248,7 +248,7 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 	if (delay == 0)
 		return queue_work(wq, work);
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -280,7 +280,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, &work->management)) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -321,7 +321,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
 		BUG_ON(get_wq_data(work) != cwq);
-		if (!test_bit(WORK_STRUCT_NOAUTOREL, &work->management))
+		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
 			work_release(work);
 		f(work);
 
-- 
cgit v1.2.3


From 071b638689464c6b39407025eedd810d5b5e6f5d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Thu, 26 Apr 2007 15:45:32 -0700
Subject: [WORKQUEUE]: cancel_delayed_work: use del_timer() instead of
 del_timer_sync()

del_timer_sync() buys nothing for cancel_delayed_work(), but it is less
efficient since it locks the timer unconditionally, and may wait for the
completion of the delayed_work_timer_fn().

cancel_delayed_work() == 0 means:

	before this patch:
		work->func may still be running or queued

	after this patch:
		work->func may still be running or queued, or
		delayed_work_timer_fn->__queue_work() in progress.

		The latter doesn't differ from the caller's POV,
		delayed_work_timer_fn() is called with _PENDING
		bit set.

cancel_delayed_work() == 1 with this patch adds a new possibility:

	delayed_work->work was cancelled, but delayed_work_timer_fn
	is still running (this is only possible for the re-arming
	works on single-threaded workqueue).

	In this case the timer was re-started by work->func(), nobody
	else can do this. This in turn means that delayed_work_timer_fn
	has already passed __queue_work() (and wont't touch delayed_work)
	because nobody else can queue delayed_work->work.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-Off-By: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/workqueue.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2a7b38d87018..b8abfc74d038 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -191,14 +191,15 @@ int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
- * function may still be running on return from cancel_delayed_work().  Run
- * flush_scheduled_work() to wait on it.
+ * function may still be running on return from cancel_delayed_work(), unless
+ * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
+ * cancel_work_sync() to wait on it.
  */
 static inline int cancel_delayed_work(struct delayed_work *work)
 {
 	int ret;
 
-	ret = del_timer_sync(&work->timer);
+	ret = del_timer(&work->timer);
 	if (ret)
 		work_release(&work->work);
 	return ret;
-- 
cgit v1.2.3


From 28287033e12463c8ff89f1ea8038783d0360391c Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Tue, 8 May 2007 00:27:47 -0700
Subject: Add a new deferrable delayed work init

Add a new deferrable delayed work init.  This can be used to schedule work
that are 'unimportant' when CPU is idle and can be called later, when CPU
eventually comes out of idle.

Use this init in cpufreq ondemand governor.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Dave Jones <davej@codemonkey.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/cpufreq/cpufreq_ondemand.c | 2 +-
 include/linux/workqueue.h          | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 8d053f500fc2..8532bb79e5fc 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -470,7 +470,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 	dbs_info->enable = 1;
 	ondemand_powersave_bias_init();
 	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
-	INIT_DELAYED_WORK(&dbs_info->work, do_dbs_timer);
+	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
 	queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work,
 	                      delay);
 }
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b8abfc74d038..f16ba1e0687d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -121,6 +121,12 @@ struct execute_work {
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
+#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func));		\
+		init_timer_deferrable(&(_work)->timer);		\
+	} while (0)
+
 /**
  * work_pending - Find out whether a work item is currently pending
  * @work: The work item in question
-- 
cgit v1.2.3


From b89deed32ccc96098bd6bc953c64bba6b847774f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 9 May 2007 02:33:52 -0700
Subject: implement flush_work()

A basic problem with flush_scheduled_work() is that it blocks behind _all_
presently-queued works, rather than just the work whcih the caller wants to
flush.  If the caller holds some lock, and if one of the queued work happens
to want that lock as well then accidental deadlocks can occur.

One example of this is the phy layer: it wants to flush work while holding
rtnl_lock().  But if a linkwatch event happens to be queued, the phy code will
deadlock because the linkwatch callback function takes rtnl_lock.

So we implement a new function which will flush a *single* work - just the one
which the caller wants to free up.  Thus we avoid the accidental deadlocks
which can arise from unrelated subsystems' callbacks taking shared locks.

flush_work() non-blockingly dequeues the work_struct which we want to kill,
then it waits for its handler to complete on all CPUs.

Add ->current_work to the "struct cpu_workqueue_struct", it points to
currently running "struct work_struct". When flush_work(work) detects
->current_work == work, it inserts a barrier at the _head_ of ->worklist
(and thus right _after_ that work) and waits for completition. This means
that the next work fired on that CPU will be this barrier, or another
barrier queued by concurrent flush_work(), so the caller of flush_work()
will be woken before any "regular" work has a chance to run.

When wait_on_work() unlocks workqueue_mutex (or whatever we choose to protect
against CPU hotplug), CPU may go away. But in that case take_over_work() will
move a barrier we queued to another CPU, it will be fired sometime, and
wait_on_work() will be woken.

Actually, we are doing cleanup_workqueue_thread()->kthread_stop() before
take_over_work(), so cwq->thread should complete its ->worklist (and thus
the barrier), because currently we don't check kthread_should_stop() in
run_workqueue(). But even if we did, everything should be ok.

[akpm@osdl.org: cleanup]
[akpm@osdl.org: add flush_work_keventd() wrapper]
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  4 +-
 kernel/workqueue.c        | 95 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 95 insertions(+), 4 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f16ba1e0687d..26a70992dec8 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -178,6 +178,8 @@ extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delay
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct delayed_work *work, unsigned long delay);
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
+extern void flush_work(struct workqueue_struct *wq, struct work_struct *work);
+extern void flush_work_keventd(struct work_struct *work);
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
 extern int FASTCALL(run_scheduled_work(struct work_struct *work));
@@ -199,7 +201,7 @@ int execute_in_process_context(work_func_t fn, struct execute_work *);
  * Kill off a pending schedule_delayed_work().  Note that the work callback
  * function may still be running on return from cancel_delayed_work(), unless
  * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
- * cancel_work_sync() to wait on it.
+ * flush_work() or cancel_work_sync() to wait on it.
  */
 static inline int cancel_delayed_work(struct delayed_work *work)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b7bb37ab03bc..918d55267a12 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,6 +46,7 @@ struct cpu_workqueue_struct {
 
 	struct workqueue_struct *wq;
 	struct task_struct *thread;
+	struct work_struct *current_work;
 
 	int run_depth;		/* Detect run_workqueue() recursion depth */
 
@@ -120,6 +121,7 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work
 	    && work_pending(work)
 	    && !list_empty(&work->entry)) {
 		work_func_t f = work->func;
+		cwq->current_work = work;
 		list_del_init(&work->entry);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
@@ -128,6 +130,7 @@ static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work
 		f(work);
 
 		spin_lock_irqsave(&cwq->lock, flags);
+		cwq->current_work = NULL;
 		ret = 1;
 	}
 	spin_unlock_irqrestore(&cwq->lock, flags);
@@ -166,6 +169,17 @@ int fastcall run_scheduled_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(run_scheduled_work);
 
+static void insert_work(struct cpu_workqueue_struct *cwq,
+				struct work_struct *work, int tail)
+{
+	set_wq_data(work, cwq);
+	if (tail)
+		list_add_tail(&work->entry, &cwq->worklist);
+	else
+		list_add(&work->entry, &cwq->worklist);
+	wake_up(&cwq->more_work);
+}
+
 /* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
@@ -173,9 +187,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 	unsigned long flags;
 
 	spin_lock_irqsave(&cwq->lock, flags);
-	set_wq_data(work, cwq);
-	list_add_tail(&work->entry, &cwq->worklist);
-	wake_up(&cwq->more_work);
+	insert_work(cwq, work, 1);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -305,6 +317,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 						struct work_struct, entry);
 		work_func_t f = work->func;
 
+		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irqrestore(&cwq->lock, flags);
 
@@ -325,6 +338,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		}
 
 		spin_lock_irqsave(&cwq->lock, flags);
+		cwq->current_work = NULL;
 	}
 	cwq->run_depth--;
 	spin_unlock_irqrestore(&cwq->lock, flags);
@@ -449,6 +463,75 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+static void wait_on_work(struct cpu_workqueue_struct *cwq,
+				struct work_struct *work)
+{
+	struct wq_barrier barr;
+	int running = 0;
+
+	spin_lock_irq(&cwq->lock);
+	if (unlikely(cwq->current_work == work)) {
+		init_wq_barrier(&barr);
+		insert_work(cwq, &barr.work, 0);
+		running = 1;
+	}
+	spin_unlock_irq(&cwq->lock);
+
+	if (unlikely(running)) {
+		mutex_unlock(&workqueue_mutex);
+		wait_for_completion(&barr.done);
+		mutex_lock(&workqueue_mutex);
+	}
+}
+
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @wq: the workqueue on which the work is queued
+ * @work: the work which is to be flushed
+ *
+ * flush_work() will attempt to cancel the work if it is queued.  If the work's
+ * callback appears to be running, flush_work() will block until it has
+ * completed.
+ *
+ * flush_work() is designed to be used when the caller is tearing down data
+ * structures which the callback function operates upon.  It is expected that,
+ * prior to calling flush_work(), the caller has arranged for the work to not
+ * be requeued.
+ */
+void flush_work(struct workqueue_struct *wq, struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq;
+
+	mutex_lock(&workqueue_mutex);
+	cwq = get_wq_data(work);
+	/* Was it ever queued ? */
+	if (!cwq)
+		goto out;
+
+	/*
+	 * This work can't be re-queued, and the lock above protects us
+	 * from take_over_work(), no need to re-check that get_wq_data()
+	 * is still the same when we take cwq->lock.
+	 */
+	spin_lock_irq(&cwq->lock);
+	list_del_init(&work->entry);
+	work_release(work);
+	spin_unlock_irq(&cwq->lock);
+
+	if (is_single_threaded(wq)) {
+		/* Always use first cpu's area. */
+		wait_on_work(per_cpu_ptr(wq->cpu_wq, singlethread_cpu), work);
+	} else {
+		int cpu;
+
+		for_each_online_cpu(cpu)
+			wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+	}
+out:
+	mutex_unlock(&workqueue_mutex);
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 static struct task_struct *create_workqueue_thread(struct workqueue_struct *wq,
 						   int cpu, int freezeable)
 {
@@ -650,6 +733,12 @@ void flush_scheduled_work(void)
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
+void flush_work_keventd(struct work_struct *work)
+{
+	flush_work(keventd_wq, work);
+}
+EXPORT_SYMBOL(flush_work_keventd);
+
 /**
  * cancel_rearming_delayed_workqueue - reliably kill off a delayed work whose handler rearms the delayed work.
  * @wq:   the controlling workqueue structure
-- 
cgit v1.2.3


From 7097a87afe937a5879528d52880c2d95f089e96c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 9 May 2007 02:34:10 -0700
Subject: workqueue: kill run_scheduled_work()

Because it has no callers.

Actually, I think the whole idea of run_scheduled_work() was not right, not
good to mix "unqueue this work and execute its ->func()" in one function.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  1 -
 kernel/workqueue.c        | 73 -----------------------------------------------
 2 files changed, 74 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 26a70992dec8..2a58f16e1961 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -182,7 +182,6 @@ extern void flush_work(struct workqueue_struct *wq, struct work_struct *work);
 extern void flush_work_keventd(struct work_struct *work);
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(run_scheduled_work(struct work_struct *work));
 extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
 
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a981add58fb9..ea422254f8bf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -98,79 +98,6 @@ static inline void *get_wq_data(struct work_struct *work)
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-static int __run_work(struct cpu_workqueue_struct *cwq, struct work_struct *work)
-{
-	int ret = 0;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cwq->lock, flags);
-	/*
-	 * We need to re-validate the work info after we've gotten
-	 * the cpu_workqueue lock. We can run the work now iff:
-	 *
-	 *  - the wq_data still matches the cpu_workqueue_struct
-	 *  - AND the work is still marked pending
-	 *  - AND the work is still on a list (which will be this
-	 *    workqueue_struct list)
-	 *
-	 * All these conditions are important, because we
-	 * need to protect against the work being run right
-	 * now on another CPU (all but the last one might be
-	 * true if it's currently running and has not been
-	 * released yet, for example).
-	 */
-	if (get_wq_data(work) == cwq
-	    && work_pending(work)
-	    && !list_empty(&work->entry)) {
-		work_func_t f = work->func;
-		cwq->current_work = work;
-		list_del_init(&work->entry);
-		spin_unlock_irqrestore(&cwq->lock, flags);
-
-		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
-			work_release(work);
-		f(work);
-
-		spin_lock_irqsave(&cwq->lock, flags);
-		cwq->current_work = NULL;
-		ret = 1;
-	}
-	spin_unlock_irqrestore(&cwq->lock, flags);
-	return ret;
-}
-
-/**
- * run_scheduled_work - run scheduled work synchronously
- * @work: work to run
- *
- * This checks if the work was pending, and runs it
- * synchronously if so. It returns a boolean to indicate
- * whether it had any scheduled work to run or not.
- *
- * NOTE! This _only_ works for normal work_structs. You
- * CANNOT use this for delayed work, because the wq data
- * for delayed work will not point properly to the per-
- * CPU workqueue struct, but will change!
- */
-int fastcall run_scheduled_work(struct work_struct *work)
-{
-	for (;;) {
-		struct cpu_workqueue_struct *cwq;
-
-		if (!work_pending(work))
-			return 0;
-		if (list_empty(&work->entry))
-			return 0;
-		/* NOTE! This depends intimately on __queue_work! */
-		cwq = get_wq_data(work);
-		if (!cwq)
-			return 0;
-		if (__run_work(cwq, work))
-			return 1;
-	}
-}
-EXPORT_SYMBOL(run_scheduled_work);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
 				struct work_struct *work, int tail)
 {
-- 
cgit v1.2.3


From 1634c48f8b85dcb05101f1eb2eab9af40b5976da Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 9 May 2007 02:34:18 -0700
Subject: make cancel_rearming_delayed_work() work on any workqueue, not just
 keventd_wq

cancel_rearming_delayed_workqueue(wq, dwork) doesn't need the first
parameter.  We don't hang on un-queued dwork any longer, and work->data
doesn't change its type.  This means we can always figure out "wq" from
dwork when it is needed.

Remove this parameter, and rename the function to
cancel_rearming_delayed_work().  Re-create an inline "obsolete"
cancel_rearming_delayed_workqueue(wq) which just calls
cancel_rearming_delayed_work().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 13 ++++++++++---
 kernel/workqueue.c        | 27 +++++++++------------------
 2 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2a58f16e1961..27110c04f21e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -191,9 +191,6 @@ extern int current_is_keventd(void);
 extern int keventd_up(void);
 
 extern void init_workqueues(void);
-void cancel_rearming_delayed_work(struct delayed_work *work);
-void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
-				       struct delayed_work *);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 /*
@@ -212,4 +209,14 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
+extern void cancel_rearming_delayed_work(struct delayed_work *work);
+
+/* Obsolete. use cancel_rearming_delayed_work() */
+static inline
+void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
+					struct delayed_work *work)
+{
+	cancel_rearming_delayed_work(work);
+}
+
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 985902e2e071..41eaffd125ca 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -555,32 +555,23 @@ void flush_work_keventd(struct work_struct *work)
 EXPORT_SYMBOL(flush_work_keventd);
 
 /**
- * cancel_rearming_delayed_workqueue - kill off a delayed work whose handler rearms the delayed work.
- * @wq:   the controlling workqueue structure
+ * cancel_rearming_delayed_work - kill off a delayed work whose handler rearms the delayed work.
  * @dwork: the delayed work struct
  *
  * Note that the work callback function may still be running on return from
  * cancel_delayed_work(). Run flush_workqueue() or flush_work() to wait on it.
  */
-void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
-				       struct delayed_work *dwork)
+void cancel_rearming_delayed_work(struct delayed_work *dwork)
 {
-	/* Was it ever queued ? */
-	if (!get_wq_data(&dwork->work))
-		return;
+	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
 
-	while (!cancel_delayed_work(dwork))
-		flush_workqueue(wq);
-}
-EXPORT_SYMBOL(cancel_rearming_delayed_workqueue);
+	/* Was it ever queued ? */
+	if (cwq != NULL) {
+		struct workqueue_struct *wq = cwq->wq;
 
-/**
- * cancel_rearming_delayed_work - kill off a delayed keventd work whose handler rearms the delayed work.
- * @dwork: the delayed work struct
- */
-void cancel_rearming_delayed_work(struct delayed_work *dwork)
-{
-	cancel_rearming_delayed_workqueue(keventd_wq, dwork);
+		while (!cancel_delayed_work(dwork))
+			flush_workqueue(wq);
+	}
 }
 EXPORT_SYMBOL(cancel_rearming_delayed_work);
 
-- 
cgit v1.2.3


From 23b2e5991afde5af91a1a661d7f47ee56120759e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 9 May 2007 02:34:19 -0700
Subject: workqueue: kill NOAUTOREL works

We don't have any users, and it is not so trivial to use NOAUTOREL works
correctly.  It is better to simplify API.

Delete NOAUTOREL support and rename work_release to work_clear_pending to
avoid a confusion.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 64 +++++++----------------------------------------
 kernel/workqueue.c        |  5 ++--
 2 files changed, 11 insertions(+), 58 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 27110c04f21e..e1581dce5890 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -24,15 +24,13 @@ typedef void (*work_func_t)(struct work_struct *work);
 struct work_struct {
 	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
-#define WORK_STRUCT_NOAUTOREL 1		/* F if work item automatically released on exec */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
 };
 
-#define WORK_DATA_INIT(autorelease) \
-	ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
 
 struct delayed_work {
 	struct work_struct work;
@@ -44,14 +42,8 @@ struct execute_work {
 };
 
 #define __WORK_INITIALIZER(n, f) {				\
-	.data = WORK_DATA_INIT(0),				\
-        .entry	= { &(n).entry, &(n).entry },			\
-	.func = (f),						\
-	}
-
-#define __WORK_INITIALIZER_NAR(n, f) {				\
-	.data = WORK_DATA_INIT(1),				\
-        .entry	= { &(n).entry, &(n).entry },			\
+	.data = WORK_DATA_INIT(),				\
+	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	}
 
@@ -60,23 +52,12 @@ struct execute_work {
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
-#define __DELAYED_WORK_INITIALIZER_NAR(n, f) {			\
-	.work = __WORK_INITIALIZER_NAR((n).work, (f)),		\
-	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
-	}
-
 #define DECLARE_WORK(n, f)					\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
-#define DECLARE_WORK_NAR(n, f)					\
-	struct work_struct n = __WORK_INITIALIZER_NAR(n, f)
-
 #define DECLARE_DELAYED_WORK(n, f)				\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
-#define DECLARE_DELAYED_WORK_NAR(n, f)			\
-	struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f)
-
 /*
  * initialize a work item's function pointer
  */
@@ -95,16 +76,9 @@ struct execute_work {
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
-#define INIT_WORK(_work, _func)					\
-	do {							\
-		(_work)->data = (atomic_long_t) WORK_DATA_INIT(0);	\
-		INIT_LIST_HEAD(&(_work)->entry);		\
-		PREPARE_WORK((_work), (_func));			\
-	} while (0)
-
-#define INIT_WORK_NAR(_work, _func)					\
+#define INIT_WORK(_work, _func)						\
 	do {								\
-		(_work)->data = (atomic_long_t) WORK_DATA_INIT(1);	\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
@@ -115,12 +89,6 @@ struct execute_work {
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK_NAR(_work, _func)			\
-	do {							\
-		INIT_WORK_NAR(&(_work)->work, (_func));		\
-		init_timer(&(_work)->timer);			\
-	} while (0)
-
 #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
@@ -143,24 +111,10 @@ struct execute_work {
 	work_pending(&(w)->work)
 
 /**
- * work_release - Release a work item under execution
- * @work: The work item to release
- *
- * This is used to release a work item that has been initialised with automatic
- * release mode disabled (WORK_STRUCT_NOAUTOREL is set).  This gives the work
- * function the opportunity to grab auxiliary data from the container of the
- * work_struct before clearing the pending bit as the work_struct may be
- * subject to deallocation the moment the pending bit is cleared.
- *
- * In such a case, this should be called in the work function after it has
- * fetched any data it may require from the containter of the work_struct.
- * After this function has been called, the work_struct may be scheduled for
- * further execution or it may be deallocated unless other precautions are
- * taken.
- *
- * This should also be used to release a delayed work item.
+ * work_clear_pending - for internal use only, mark a work item as not pending
+ * @work: The work item in question
  */
-#define work_release(work) \
+#define work_clear_pending(work) \
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
@@ -205,7 +159,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 
 	ret = del_timer(&work->timer);
 	if (ret)
-		work_release(&work->work);
+		work_clear_pending(&work->work);
 	return ret;
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 41eaffd125ca..0611de815a8f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -246,8 +246,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		spin_unlock_irq(&cwq->lock);
 
 		BUG_ON(get_wq_data(work) != cwq);
-		if (!test_bit(WORK_STRUCT_NOAUTOREL, work_data_bits(work)))
-			work_release(work);
+		work_clear_pending(work);
 		f(work);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
@@ -453,7 +452,7 @@ void flush_work(struct workqueue_struct *wq, struct work_struct *work)
 	 */
 	spin_lock_irq(&cwq->lock);
 	list_del_init(&work->entry);
-	work_release(work);
+	work_clear_pending(work);
 	spin_unlock_irq(&cwq->lock);
 
 	for_each_cpu_mask(cpu, *cpu_map)
-- 
cgit v1.2.3


From 28e53bddf814485699a4142bc056fd37d4e11dd4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 9 May 2007 02:34:22 -0700
Subject: unify flush_work/flush_work_keventd and rename it to cancel_work_sync

flush_work(wq, work) doesn't need the first parameter, we can use cwq->wq
(this was possible from the very beginnig, I missed this).  So we can unify
flush_work_keventd and flush_work.

Also, rename flush_work() to cancel_work_sync() and fix all callers.
Perhaps this is not the best name, but "flush_work" is really bad.

(akpm: this is why the earlier patches bypassed maintainers)

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Jeff Garzik <jeff@garzik.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Tejun Heo <htejun@gmail.com>
Cc: Auke Kok <auke-jan.h.kok@intel.com>,
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/ll_rw_blk.c              |  2 +-
 drivers/ata/libata-core.c      |  8 ++++----
 drivers/net/e1000/e1000_main.c |  2 +-
 drivers/net/phy/phy.c          |  4 ++--
 drivers/net/tg3.c              |  2 +-
 fs/aio.c                       |  4 ++--
 include/linux/workqueue.h      | 21 ++++++++++++---------
 kernel/workqueue.c             | 36 +++++++++++++++++-------------------
 net/ipv4/ipvs/ip_vs_ctl.c      |  2 +-
 9 files changed, 41 insertions(+), 40 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c059767c552c..df506571ed60 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3633,7 +3633,7 @@ EXPORT_SYMBOL(kblockd_schedule_work);
 
 void kblockd_flush_work(struct work_struct *work)
 {
-	flush_work(kblockd_workqueue, work);
+	cancel_work_sync(work);
 }
 EXPORT_SYMBOL(kblockd_flush_work);
 
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index b74e56caba6f..fef87dd70d17 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1316,7 +1316,7 @@ void ata_port_flush_task(struct ata_port *ap)
 	spin_unlock_irqrestore(ap->lock, flags);
 
 	DPRINTK("flush #1\n");
-	flush_work(ata_wq, &ap->port_task.work); /* akpm: seems unneeded */
+	cancel_work_sync(&ap->port_task.work); /* akpm: seems unneeded */
 
 	/*
 	 * At this point, if a task is running, it's guaranteed to see
@@ -1327,7 +1327,7 @@ void ata_port_flush_task(struct ata_port *ap)
 		if (ata_msg_ctl(ap))
 			ata_port_printk(ap, KERN_DEBUG, "%s: flush #2\n",
 					__FUNCTION__);
-		flush_work(ata_wq, &ap->port_task.work);
+		cancel_work_sync(&ap->port_task.work);
 	}
 
 	spin_lock_irqsave(ap->lock, flags);
@@ -6475,9 +6475,9 @@ void ata_port_detach(struct ata_port *ap)
 	/* Flush hotplug task.  The sequence is similar to
 	 * ata_port_flush_task().
 	 */
-	flush_work(ata_aux_wq, &ap->hotplug_task.work); /* akpm: why? */
+	cancel_work_sync(&ap->hotplug_task.work); /* akpm: why? */
 	cancel_delayed_work(&ap->hotplug_task);
-	flush_work(ata_aux_wq, &ap->hotplug_task.work);
+	cancel_work_sync(&ap->hotplug_task.work);
 
  skip_eh:
 	/* remove the associated SCSI host */
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 397e25bdbfec..637ae8f68791 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -1214,7 +1214,7 @@ e1000_remove(struct pci_dev *pdev)
 	int i;
 #endif
 
-	flush_work_keventd(&adapter->reset_task);
+	cancel_work_sync(&adapter->reset_task);
 
 	e1000_release_manageability(adapter);
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index f445c465b14e..f71dab347667 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -663,9 +663,9 @@ int phy_stop_interrupts(struct phy_device *phydev)
 
 	/*
 	 * Finish any pending work; we might have been scheduled to be called
-	 * from keventd ourselves, but flush_work_keventd() handles that.
+	 * from keventd ourselves, but cancel_work_sync() handles that.
 	 */
-	flush_work_keventd(&phydev->phy_queue);
+	cancel_work_sync(&phydev->phy_queue);
 
 	free_irq(phydev->irq, phydev);
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 0c0f9c817321..923b9c725cc3 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -7386,7 +7386,7 @@ static int tg3_close(struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
-	flush_work_keventd(&tp->reset_task);
+	cancel_work_sync(&tp->reset_task);
 
 	netif_stop_queue(dev);
 
diff --git a/fs/aio.c b/fs/aio.c
index d18690bb03e9..ac1c1587aa02 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -348,7 +348,7 @@ void fastcall exit_aio(struct mm_struct *mm)
 		/*
 		 * Ensure we don't leave the ctx on the aio_wq
 		 */
-		flush_work(aio_wq, &ctx->wq.work);
+		cancel_work_sync(&ctx->wq.work);
 
 		if (1 != atomic_read(&ctx->users))
 			printk(KERN_DEBUG
@@ -371,7 +371,7 @@ void fastcall __put_ioctx(struct kioctx *ctx)
 	BUG_ON(ctx->reqs_active);
 
 	cancel_delayed_work(&ctx->wq);
-	flush_work(aio_wq, &ctx->wq.work);
+	cancel_work_sync(&ctx->wq.work);
 	aio_free_ring(ctx);
 	mmdrop(ctx->mm);
 	ctx->mm = NULL;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e1581dce5890..d555f31c0746 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -128,30 +128,33 @@ extern struct workqueue_struct *__create_workqueue(const char *name,
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay));
+extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *work, unsigned long delay));
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-	struct delayed_work *work, unsigned long delay);
+			struct delayed_work *work, unsigned long delay);
+
 extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
-extern void flush_work(struct workqueue_struct *wq, struct work_struct *work);
-extern void flush_work_keventd(struct work_struct *work);
+extern void flush_scheduled_work(void);
 
 extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
-
-extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
+extern int FASTCALL(schedule_delayed_work(struct delayed_work *work,
+					unsigned long delay));
+extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
+					unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-extern void flush_scheduled_work(void);
 extern int current_is_keventd(void);
 extern int keventd_up(void);
 
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
+extern void cancel_work_sync(struct work_struct *work);
+
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
  * function may still be running on return from cancel_delayed_work(), unless
  * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
- * flush_work() or cancel_work_sync() to wait on it.
+ * cancel_work_sync() to wait on it.
  */
 static inline int cancel_delayed_work(struct delayed_work *work)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 63885abf1ba0..c9ab4293904f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -413,23 +413,23 @@ static void wait_on_work(struct cpu_workqueue_struct *cwq,
 }
 
 /**
- * flush_work - block until a work_struct's callback has terminated
- * @wq: the workqueue on which the work is queued
+ * cancel_work_sync - block until a work_struct's callback has terminated
  * @work: the work which is to be flushed
  *
- * flush_work() will attempt to cancel the work if it is queued.  If the work's
- * callback appears to be running, flush_work() will block until it has
- * completed.
+ * cancel_work_sync() will attempt to cancel the work if it is queued. If the
+ * work's callback appears to be running, cancel_work_sync() will block until
+ * it has completed.
  *
- * flush_work() is designed to be used when the caller is tearing down data
- * structures which the callback function operates upon.  It is expected that,
- * prior to calling flush_work(), the caller has arranged for the work to not
- * be requeued.
+ * cancel_work_sync() is designed to be used when the caller is tearing down
+ * data structures which the callback function operates upon. It is expected
+ * that, prior to calling cancel_work_sync(), the caller has arranged for the
+ * work to not be requeued.
  */
-void flush_work(struct workqueue_struct *wq, struct work_struct *work)
+void cancel_work_sync(struct work_struct *work)
 {
-	const cpumask_t *cpu_map = wq_cpu_map(wq);
 	struct cpu_workqueue_struct *cwq;
+	struct workqueue_struct *wq;
+	const cpumask_t *cpu_map;
 	int cpu;
 
 	might_sleep();
@@ -448,10 +448,13 @@ void flush_work(struct workqueue_struct *wq, struct work_struct *work)
 	work_clear_pending(work);
 	spin_unlock_irq(&cwq->lock);
 
+	wq = cwq->wq;
+	cpu_map = wq_cpu_map(wq);
+
 	for_each_cpu_mask(cpu, *cpu_map)
 		wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
-EXPORT_SYMBOL_GPL(flush_work);
+EXPORT_SYMBOL_GPL(cancel_work_sync);
 
 
 static struct workqueue_struct *keventd_wq;
@@ -540,18 +543,13 @@ void flush_scheduled_work(void)
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
-void flush_work_keventd(struct work_struct *work)
-{
-	flush_work(keventd_wq, work);
-}
-EXPORT_SYMBOL(flush_work_keventd);
-
 /**
  * cancel_rearming_delayed_work - kill off a delayed work whose handler rearms the delayed work.
  * @dwork: the delayed work struct
  *
  * Note that the work callback function may still be running on return from
- * cancel_delayed_work(). Run flush_workqueue() or flush_work() to wait on it.
+ * cancel_delayed_work(). Run flush_workqueue() or cancel_work_sync() to wait
+ * on it.
  */
 void cancel_rearming_delayed_work(struct delayed_work *dwork)
 {
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 342e836677a1..68fe1d4d0210 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -2387,7 +2387,7 @@ void ip_vs_control_cleanup(void)
 	EnterFunction(2);
 	ip_vs_trash_cleanup();
 	cancel_rearming_delayed_work(&defense_work);
-	flush_work_keventd(&defense_work.work);
+	cancel_work_sync(&defense_work.work);
 	ip_vs_kill_estimator(&ip_vs_stats);
 	unregister_sysctl_table(sysctl_header);
 	proc_net_remove("ip_vs_stats");
-- 
cgit v1.2.3


From e3dfd2964ea86ae65f511b10d62ea54d46db3708 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 16 May 2007 22:11:11 -0700
Subject: make freezeable workqueues singlethread

It is a known fact that freezeable multithreaded workqueues doesn't like
CPU_DEAD. We keep them only for the incoming CPU-hotplug rework.

Sadly, we can't just kill create_freezeable_workqueue() right now, make
them singlethread.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d555f31c0746..7eae8665ff59 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -122,7 +122,7 @@ extern struct workqueue_struct *__create_workqueue(const char *name,
 						    int singlethread,
 						    int freezeable);
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 0, 1)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
-- 
cgit v1.2.3


From 223a10a98135da38d3668973d72cdffb1ced4b7d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 18 May 2007 00:36:42 -0700
Subject: revert "cancel_delayed_work: use del_timer() instead of
 del_timer_sync()"

As pointed out by Jarek Poplawski, the patch

	[WORKQUEUE]: cancel_delayed_work: use del_timer() instead of del_timer_sync()
	commit: 071b638689464c6b39407025eedd810d5b5e6f5d

was wrong, it was merged by mistake after that.

From the changelog:

	after this patch:
		...
		delayed_work_timer_fn->__queue_work() in progress.

		The latter doesn't differ from the caller's POV,

it does make a difference if the caller calls flush_workqueue() after
cancel_delayed_work(), in that case flush_workqueue() can miss this
work_struct.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Jarek Poplawski <jarkao2@o2.pl>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7eae8665ff59..ce0719a2cfeb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -160,7 +160,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 {
 	int ret;
 
-	ret = del_timer(&work->timer);
+	ret = del_timer_sync(&work->timer);
 	if (ret)
 		work_clear_pending(&work->work);
 	return ret;
-- 
cgit v1.2.3


From f5a421a4509a7e2dff11da0f01b0548f4f84d503 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 15 Jul 2007 23:41:44 -0700
Subject: rename cancel_rearming_delayed_work() to cancel_delayed_work_sync()

Imho, the current naming of cancel_xxx workqueue functions is very confusing.

	cancel_delayed_work()
	cancel_rearming_delayed_work()
	cancel_rearming_delayed_workqueue()	// obsolete

	cancel_work_sync()

This looks as if the first 2 functions differ in "type" of their argument
which is not true any longer, nowadays the difference is the behaviour.

The semantics of cancel_rearming_delayed_work(dwork) was changed
significantly, it doesn't require that dwork rearms itself, and cancels dwork
synchronously.

Rename it to cancel_delayed_work_sync().  This matches cancel_delayed_work()
and cancel_work_sync().  Re-create cancel_rearming_delayed_work() as a simple
inline obsolete wrapper, like cancel_rearming_delayed_workqueue().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 13 ++++++++++---
 kernel/workqueue.c        |  6 +++---
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ce0719a2cfeb..5c89ac6e7f55 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -166,14 +166,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-extern void cancel_rearming_delayed_work(struct delayed_work *work);
+extern void cancel_delayed_work_sync(struct delayed_work *work);
 
-/* Obsolete. use cancel_rearming_delayed_work() */
+/* Obsolete. use cancel_delayed_work_sync() */
 static inline
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 					struct delayed_work *work)
 {
-	cancel_rearming_delayed_work(work);
+	cancel_delayed_work_sync(work);
+}
+
+/* Obsolete. use cancel_delayed_work_sync() */
+static inline
+void cancel_rearming_delayed_work(struct delayed_work *work)
+{
+	cancel_delayed_work_sync(work);
 }
 
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 3bebf73be976..ad9656886daa 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -486,13 +486,13 @@ void cancel_work_sync(struct work_struct *work)
 EXPORT_SYMBOL_GPL(cancel_work_sync);
 
 /**
- * cancel_rearming_delayed_work - reliably kill off a delayed work.
+ * cancel_delayed_work_sync - reliably kill off a delayed work.
  * @dwork: the delayed work struct
  *
  * It is possible to use this function if @dwork rearms itself via queue_work()
  * or queue_delayed_work(). See also the comment for cancel_work_sync().
  */
-void cancel_rearming_delayed_work(struct delayed_work *dwork)
+void cancel_delayed_work_sync(struct delayed_work *dwork)
 {
 	while (!del_timer(&dwork->timer) &&
 	       !try_to_grab_pending(&dwork->work))
@@ -500,7 +500,7 @@ void cancel_rearming_delayed_work(struct delayed_work *dwork)
 	wait_on_work(&dwork->work);
 	work_clear_pending(&dwork->work);
 }
-EXPORT_SYMBOL(cancel_rearming_delayed_work);
+EXPORT_SYMBOL(cancel_delayed_work_sync);
 
 static struct workqueue_struct *keventd_wq __read_mostly;
 
-- 
cgit v1.2.3


From 1f1f642e2f092e37eb9038060eb0100c44f55a11 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sun, 15 Jul 2007 23:41:44 -0700
Subject: make cancel_xxx_work_sync() return a boolean

Change cancel_work_sync() and cancel_delayed_work_sync() to return a boolean
indicating whether the work was actually cancelled.  A zero return value means
that the work was not pending/queued.

Without that kind of change it is not possible to avoid flush_workqueue()
sometimes, see the next patch as an example.

Also, this patch unifies both functions and kills the (unlikely) busy-wait
loop.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Jarek Poplawski <jarkao2@o2.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  4 ++--
 kernel/workqueue.c        | 41 +++++++++++++++++++++++++++--------------
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5c89ac6e7f55..ce6badc98f6d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -148,7 +148,7 @@ extern int keventd_up(void);
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
-extern void cancel_work_sync(struct work_struct *work);
+extern int cancel_work_sync(struct work_struct *work);
 
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
@@ -166,7 +166,7 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-extern void cancel_delayed_work_sync(struct delayed_work *work);
+extern int cancel_delayed_work_sync(struct delayed_work *work);
 
 /* Obsolete. use cancel_delayed_work_sync() */
 static inline
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ad9656886daa..d7d3fa3072e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -382,16 +382,16 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
 /*
- * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit,
+ * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
  */
 static int try_to_grab_pending(struct work_struct *work)
 {
 	struct cpu_workqueue_struct *cwq;
-	int ret = 0;
+	int ret = -1;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
-		return 1;
+		return 0;
 
 	/*
 	 * The queueing is in progress, or it is already queued. Try to
@@ -457,10 +457,28 @@ static void wait_on_work(struct work_struct *work)
 		wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
 
+static int __cancel_work_timer(struct work_struct *work,
+				struct timer_list* timer)
+{
+	int ret;
+
+	do {
+		ret = (timer && likely(del_timer(timer)));
+		if (!ret)
+			ret = try_to_grab_pending(work);
+		wait_on_work(work);
+	} while (unlikely(ret < 0));
+
+	work_clear_pending(work);
+	return ret;
+}
+
 /**
  * cancel_work_sync - block until a work_struct's callback has terminated
  * @work: the work which is to be flushed
  *
+ * Returns true if @work was pending.
+ *
  * cancel_work_sync() will cancel the work if it is queued. If the work's
  * callback appears to be running, cancel_work_sync() will block until it
  * has completed.
@@ -476,12 +494,9 @@ static void wait_on_work(struct work_struct *work)
  * The caller must ensure that workqueue_struct on which this work was last
  * queued can't be destroyed before this function returns.
  */
-void cancel_work_sync(struct work_struct *work)
+int cancel_work_sync(struct work_struct *work)
 {
-	while (!try_to_grab_pending(work))
-		cpu_relax();
-	wait_on_work(work);
-	work_clear_pending(work);
+	return __cancel_work_timer(work, NULL);
 }
 EXPORT_SYMBOL_GPL(cancel_work_sync);
 
@@ -489,16 +504,14 @@ EXPORT_SYMBOL_GPL(cancel_work_sync);
  * cancel_delayed_work_sync - reliably kill off a delayed work.
  * @dwork: the delayed work struct
  *
+ * Returns true if @dwork was pending.
+ *
  * It is possible to use this function if @dwork rearms itself via queue_work()
  * or queue_delayed_work(). See also the comment for cancel_work_sync().
  */
-void cancel_delayed_work_sync(struct delayed_work *dwork)
+int cancel_delayed_work_sync(struct delayed_work *dwork)
 {
-	while (!del_timer(&dwork->timer) &&
-	       !try_to_grab_pending(&dwork->work))
-		cpu_relax();
-	wait_on_work(&dwork->work);
-	work_clear_pending(&dwork->work);
+	return __cancel_work_timer(&dwork->work, &dwork->timer);
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-- 
cgit v1.2.3


From 4e6045f134784f4b158b3c0f7a282b04bd816887 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 18 Oct 2007 23:39:55 -0700
Subject: workqueue: debug flushing deadlocks with lockdep

In the following scenario:

code path 1:
  my_function() -> lock(L1); ...; flush_workqueue(); ...

code path 2:
  run_workqueue() -> my_work() -> ...; lock(L1); ...

you can get a deadlock when my_work() is queued or running
but my_function() has acquired L1 already.

This patch adds a pseudo-lock to each workqueue to make lockdep
warn about this scenario.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h   |  8 ++++++++
 include/linux/workqueue.h | 49 ++++++++++++++++++++++++++++++++++++++++++++---
 kernel/lockdep.c          |  2 +-
 kernel/workqueue.c        | 36 +++++++++++++++++++++++++++++++---
 4 files changed, 88 insertions(+), 7 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f6279f68a827..4c4d236ded18 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -275,6 +275,14 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 		lockdep_init_map(&(lock)->dep_map, #lock, \
 				 (lock)->dep_map.key, sub)
 
+/*
+ * To initialize a lockdep_map statically use this macro.
+ * Note that _name must not be NULL.
+ */
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+	{ .name = (_name), .key = (void *)(_key), }
+
+
 /*
  * Acquire a lock.
  *
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ce6badc98f6d..7daafdc2514b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -8,6 +8,7 @@
 #include <linux/timer.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
+#include <linux/lockdep.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -28,6 +29,9 @@ struct work_struct {
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
@@ -41,10 +45,23 @@ struct execute_work {
 	struct work_struct work;
 };
 
+#ifdef CONFIG_LOCKDEP
+/*
+ * NB: because we have to copy the lockdep_map, setting _key
+ * here is required, otherwise it could get initialised to the
+ * copy of the lockdep_map!
+ */
+#define __WORK_INIT_LOCKDEP_MAP(n, k) \
+	.lockdep_map = STATIC_LOCKDEP_MAP_INIT(n, k),
+#else
+#define __WORK_INIT_LOCKDEP_MAP(n, k)
+#endif
+
 #define __WORK_INITIALIZER(n, f) {				\
 	.data = WORK_DATA_INIT(),				\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
+	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
 	}
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
@@ -76,12 +93,24 @@ struct execute_work {
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
+#ifdef CONFIG_LOCKDEP
+#define INIT_WORK(_work, _func)						\
+	do {								\
+		static struct lock_class_key __key;			\
+									\
+		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
+		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
+		INIT_LIST_HEAD(&(_work)->entry);			\
+		PREPARE_WORK((_work), (_func));				\
+	} while (0)
+#else
 #define INIT_WORK(_work, _func)						\
 	do {								\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
+#endif
 
 #define INIT_DELAYED_WORK(_work, _func)				\
 	do {							\
@@ -118,9 +147,23 @@ struct execute_work {
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
 
-extern struct workqueue_struct *__create_workqueue(const char *name,
-						    int singlethread,
-						    int freezeable);
+extern struct workqueue_struct *
+__create_workqueue_key(const char *name, int singlethread,
+		       int freezeable, struct lock_class_key *key);
+
+#ifdef CONFIG_LOCKDEP
+#define __create_workqueue(name, singlethread, freezeable)	\
+({								\
+	static struct lock_class_key __key;			\
+								\
+	__create_workqueue_key((name), (singlethread),		\
+			       (freezeable), &__key);		\
+})
+#else
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), NULL)
+#endif
+
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
 #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
 #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index a6f1ee9c92d9..b5392ff7e6a6 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1521,7 +1521,7 @@ cache_hit:
 }
 
 static int validate_chain(struct task_struct *curr, struct lockdep_map *lock,
-	       	struct held_lock *hlock, int chain_head, u64 chain_key)
+		struct held_lock *hlock, int chain_head, u64 chain_key)
 {
 	/*
 	 * Trylock needs to maintain the stack of held locks, but it
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e080d1d744cc..d1916fea7108 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -32,6 +32,7 @@
 #include <linux/freezer.h>
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
+#include <linux/lockdep.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -61,6 +62,9 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
 };
 
 /* All the per-cpu workqueues on the system, for hotplug cpu to add/remove
@@ -250,6 +254,17 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct work_struct *work = list_entry(cwq->worklist.next,
 						struct work_struct, entry);
 		work_func_t f = work->func;
+#ifdef CONFIG_LOCKDEP
+		/*
+		 * It is permissible to free the struct work_struct
+		 * from inside the function that is called from it,
+		 * this we need to take into account for lockdep too.
+		 * To avoid bogus "held lock freed" warnings as well
+		 * as problems when looking into work->lockdep_map,
+		 * make a copy and use that here.
+		 */
+		struct lockdep_map lockdep_map = work->lockdep_map;
+#endif
 
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
@@ -257,7 +272,11 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 
 		BUG_ON(get_wq_data(work) != cwq);
 		work_clear_pending(work);
+		lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+		lock_acquire(&lockdep_map, 0, 0, 0, 2, _THIS_IP_);
 		f(work);
+		lock_release(&lockdep_map, 1, _THIS_IP_);
+		lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
 
 		if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 			printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
@@ -376,6 +395,8 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
 	int cpu;
 
 	might_sleep();
+	lock_acquire(&wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&wq->lockdep_map, 1, _THIS_IP_);
 	for_each_cpu_mask(cpu, *cpu_map)
 		flush_cpu_workqueue(per_cpu_ptr(wq->cpu_wq, cpu));
 }
@@ -446,6 +467,9 @@ static void wait_on_work(struct work_struct *work)
 
 	might_sleep();
 
+	lock_acquire(&work->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&work->lockdep_map, 1, _THIS_IP_);
+
 	cwq = get_wq_data(work);
 	if (!cwq)
 		return;
@@ -695,8 +719,10 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	}
 }
 
-struct workqueue_struct *__create_workqueue(const char *name,
-					    int singlethread, int freezeable)
+struct workqueue_struct *__create_workqueue_key(const char *name,
+						int singlethread,
+						int freezeable,
+						struct lock_class_key *key)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -713,6 +739,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	}
 
 	wq->name = name;
+	lockdep_init_map(&wq->lockdep_map, name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
@@ -741,7 +768,7 @@ struct workqueue_struct *__create_workqueue(const char *name,
 	}
 	return wq;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue);
+EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
@@ -752,6 +779,9 @@ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	if (cwq->thread == NULL)
 		return;
 
+	lock_acquire(&cwq->wq->lockdep_map, 0, 0, 0, 2, _THIS_IP_);
+	lock_release(&cwq->wq->lockdep_map, 1, _THIS_IP_);
+
 	flush_cpu_workqueue(cwq);
 	/*
 	 * If the caller is CPU_DEAD and cwq->worklist was not empty,
-- 
cgit v1.2.3


From eb13ba873881abd5e15af784756a61af635e665e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 16 Jan 2008 09:51:58 +0100
Subject: lockdep: fix workqueue creation API lockdep interaction Dave Young
 reported warnings from lockdep that the workqueue API can sometimes try to
 register lockdep classes with the same key but different names. This is not
 permitted in lockdep.

Unfortunately, I was unaware of that restriction when I wrote
the code to debug workqueue problems with lockdep and used the
workqueue name as the lockdep class name. This can obviously
lead to the problem if the workqueue name is dynamic.

This patch solves the problem by always using a constant name
for the workqueue's lockdep class, namely either the constant
name that was passed in or a string consisting of the variable
name.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 include/linux/workqueue.h | 14 +++++++++++---
 kernel/workqueue.c        |  5 +++--
 2 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7daafdc2514b..7f28c32d9aca 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -149,19 +149,27 @@ struct execute_work {
 
 extern struct workqueue_struct *
 __create_workqueue_key(const char *name, int singlethread,
-		       int freezeable, struct lock_class_key *key);
+		       int freezeable, struct lock_class_key *key,
+		       const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
 #define __create_workqueue(name, singlethread, freezeable)	\
 ({								\
 	static struct lock_class_key __key;			\
+	const char *__lock_name;				\
+								\
+	if (__builtin_constant_p(name))				\
+		__lock_name = (name);				\
+	else							\
+		__lock_name = #name;				\
 								\
 	__create_workqueue_key((name), (singlethread),		\
-			       (freezeable), &__key);		\
+			       (freezeable), &__key,		\
+			       __lock_name);			\
 })
 #else
 #define __create_workqueue(name, singlethread, freezeable)	\
-	__create_workqueue_key((name), (singlethread), (freezeable), NULL)
+	__create_workqueue_key((name), (singlethread), (freezeable), NULL, NULL)
 #endif
 
 #define create_workqueue(name) __create_workqueue((name), 0, 0)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 52d5e7c9a8e6..8db0b597509e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -722,7 +722,8 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						int singlethread,
 						int freezeable,
-						struct lock_class_key *key)
+						struct lock_class_key *key,
+						const char *lock_name)
 {
 	struct workqueue_struct *wq;
 	struct cpu_workqueue_struct *cwq;
@@ -739,7 +740,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	}
 
 	wq->name = name;
-	lockdep_init_map(&wq->lockdep_map, name, key, 0);
+	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
-- 
cgit v1.2.3


From b3c97528689619fc66569b30bf83d09d9929521a Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 13 Feb 2008 15:03:15 -0800
Subject: include/linux: Remove all users of FASTCALL() macro

FASTCALL() is always expanded to empty, remove it.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/aio.h            | 20 ++++++++++----------
 include/linux/buffer_head.h    |  6 +++---
 include/linux/file.h           | 16 ++++++++--------
 include/linux/gfp.h            | 15 +++++++--------
 include/linux/interrupt.h      |  8 ++++----
 include/linux/mm.h             |  4 ++--
 include/linux/mutex-debug.h    |  2 +-
 include/linux/namei.h          |  6 +++---
 include/linux/netdevice.h      |  2 +-
 include/linux/pagemap.h        | 10 +++++-----
 include/linux/pid.h            | 21 ++++++++++-----------
 include/linux/rwsem-spinlock.h | 16 ++++++++--------
 include/linux/sched.h          | 14 +++++++-------
 include/linux/swap.h           |  8 ++++----
 include/linux/wait.h           | 34 ++++++++++++++++------------------
 include/linux/workqueue.h      | 13 ++++++-------
 16 files changed, 95 insertions(+), 100 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7ef8de662001..a9931e2e5624 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -206,21 +206,21 @@ struct kioctx {
 /* prototypes */
 extern unsigned aio_max_size;
 
-extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb));
-extern int FASTCALL(aio_put_req(struct kiocb *iocb));
-extern void FASTCALL(kick_iocb(struct kiocb *iocb));
-extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2));
-extern void FASTCALL(__put_ioctx(struct kioctx *ctx));
+extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
+extern int aio_put_req(struct kiocb *iocb);
+extern void kick_iocb(struct kiocb *iocb);
+extern int aio_complete(struct kiocb *iocb, long res, long res2);
+extern void __put_ioctx(struct kioctx *ctx);
 struct mm_struct;
-extern void FASTCALL(exit_aio(struct mm_struct *mm));
+extern void exit_aio(struct mm_struct *mm);
 extern struct kioctx *lookup_ioctx(unsigned long ctx_id);
-extern int FASTCALL(io_submit_one(struct kioctx *ctx,
-			struct iocb __user *user_iocb, struct iocb *iocb));
+extern int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+			 struct iocb *iocb);
 
 /* semi private, but used by the 32bit emulations: */
 struct kioctx *lookup_ioctx(unsigned long ctx_id);
-int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
-				  struct iocb *iocb));
+int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
+		  struct iocb *iocb);
 
 #define get_ioctx(kioctx) do {						\
 	BUG_ON(atomic_read(&(kioctx)->users) <= 0);			\
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e98801f06dcc..932eb02a2753 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -144,7 +144,7 @@ BUFFER_FNS(Unwritten, unwritten)
  * Declarations
  */
 
-void FASTCALL(mark_buffer_dirty(struct buffer_head *bh));
+void mark_buffer_dirty(struct buffer_head *bh);
 void init_buffer(struct buffer_head *, bh_end_io_t *, void *);
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset);
@@ -185,8 +185,8 @@ struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size
 void invalidate_bh_lrus(void);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
-void FASTCALL(unlock_buffer(struct buffer_head *bh));
-void FASTCALL(__lock_buffer(struct buffer_head *bh));
+void unlock_buffer(struct buffer_head *bh);
+void __lock_buffer(struct buffer_head *bh);
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int sync_dirty_buffer(struct buffer_head *bh);
 int submit_bh(int, struct buffer_head *);
diff --git a/include/linux/file.h b/include/linux/file.h
index 56023c74e9fd..7239baac81a9 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -59,8 +59,8 @@ struct files_struct {
 
 extern struct kmem_cache *filp_cachep;
 
-extern void FASTCALL(__fput(struct file *));
-extern void FASTCALL(fput(struct file *));
+extern void __fput(struct file *);
+extern void fput(struct file *);
 
 struct file_operations;
 struct vfsmount;
@@ -77,13 +77,13 @@ static inline void fput_light(struct file *file, int fput_needed)
 		fput(file);
 }
 
-extern struct file * FASTCALL(fget(unsigned int fd));
-extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed));
-extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag));
+extern struct file *fget(unsigned int fd);
+extern struct file *fget_light(unsigned int fd, int *fput_needed);
+extern void set_close_on_exec(unsigned int fd, int flag);
 extern void put_filp(struct file *);
 extern int get_unused_fd(void);
 extern int get_unused_fd_flags(int flags);
-extern void FASTCALL(put_unused_fd(unsigned int fd));
+extern void put_unused_fd(unsigned int fd);
 struct kmem_cache;
 
 extern int expand_files(struct files_struct *, int nr);
@@ -110,12 +110,12 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in
  */
 #define fcheck(fd)	fcheck_files(current->files, fd)
 
-extern void FASTCALL(fd_install(unsigned int fd, struct file * file));
+extern void fd_install(unsigned int fd, struct file *file);
 
 struct task_struct;
 
 struct files_struct *get_files_struct(struct task_struct *);
-void FASTCALL(put_files_struct(struct files_struct *fs));
+void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct task_struct *, struct files_struct *);
 
 extern struct kmem_cache *files_cachep;
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 0c6ce515185d..164be9da3c1b 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -172,8 +172,7 @@ static inline void arch_free_page(struct page *page, int order) { }
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
-extern struct page *
-FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
+extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *);
 
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
@@ -209,8 +208,8 @@ extern struct page *alloc_page_vma(gfp_t gfp_mask,
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
-extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order));
-extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
+extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
+extern unsigned long get_zeroed_page(gfp_t gfp_mask);
 
 #define __get_free_page(gfp_mask) \
 		__get_free_pages((gfp_mask),0)
@@ -218,10 +217,10 @@ extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask));
 #define __get_dma_pages(gfp_mask, order) \
 		__get_free_pages((gfp_mask) | GFP_DMA,(order))
 
-extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
-extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
-extern void FASTCALL(free_hot_page(struct page *page));
-extern void FASTCALL(free_cold_page(struct page *page));
+extern void __free_pages(struct page *page, unsigned int order);
+extern void free_pages(unsigned long addr, unsigned int order);
+extern void free_hot_page(struct page *page);
+extern void free_cold_page(struct page *page);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index dea7598aeff4..f8ab4ce70564 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -273,8 +273,8 @@ asmlinkage void do_softirq(void);
 extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data);
 extern void softirq_init(void);
 #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
-extern void FASTCALL(raise_softirq_irqoff(unsigned int nr));
-extern void FASTCALL(raise_softirq(unsigned int nr));
+extern void raise_softirq_irqoff(unsigned int nr);
+extern void raise_softirq(unsigned int nr);
 
 
 /* Tasklets --- multithreaded analogue of BHs.
@@ -341,7 +341,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t)
 #define tasklet_unlock(t) do { } while (0)
 #endif
 
-extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t));
+extern void __tasklet_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_schedule(struct tasklet_struct *t)
 {
@@ -349,7 +349,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t)
 		__tasklet_schedule(t);
 }
 
-extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t));
+extern void __tasklet_hi_schedule(struct tasklet_struct *t);
 
 static inline void tasklet_hi_schedule(struct tasklet_struct *t)
 {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e8abb3814209..26c7124b841a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -786,7 +786,7 @@ int __set_page_dirty_nobuffers(struct page *page);
 int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
-int FASTCALL(set_page_dirty(struct page *page));
+int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
 
@@ -829,7 +829,7 @@ extern void unregister_shrinker(struct shrinker *);
 
 int vma_wants_writenotify(struct vm_area_struct *vma);
 
-extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
+extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl);
 
 #ifdef __PAGETABLE_PUD_FOLDED
 static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd,
diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h
index 2537285e1064..731d77d6e155 100644
--- a/include/linux/mutex-debug.h
+++ b/include/linux/mutex-debug.h
@@ -18,6 +18,6 @@ do {									\
 	__mutex_init((mutex), #mutex, &__key);				\
 } while (0)
 
-extern void FASTCALL(mutex_destroy(struct mutex *lock));
+extern void mutex_destroy(struct mutex *lock);
 
 #endif
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 4cb4f8d2f78d..c13e411491f4 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -62,13 +62,13 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
 #define LOOKUP_ACCESS		(0x0400)
 #define LOOKUP_CHDIR		(0x0800)
 
-extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
-extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *));
+extern int __user_walk(const char __user *, unsigned, struct nameidata *);
+extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *);
 #define user_path_walk(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd)
 #define user_path_walk_link(name,nd) \
 	__user_walk_fd(AT_FDCWD, name, 0, nd)
-extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
+extern int path_lookup(const char *, unsigned, struct nameidata *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct nameidata *);
 extern void path_release(struct nameidata *);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 047d432bde55..7128a02f1d37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -322,7 +322,7 @@ enum
 	NAPI_STATE_DISABLE,	/* Disable pending */
 };
 
-extern void FASTCALL(__napi_schedule(struct napi_struct *n));
+extern void __napi_schedule(struct napi_struct *n);
 
 static inline int napi_disable_pending(struct napi_struct *n)
 {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 4b62a105622b..d2fca802f809 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -156,10 +156,10 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 	return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 }
 
-extern void FASTCALL(__lock_page(struct page *page));
-extern int FASTCALL(__lock_page_killable(struct page *page));
-extern void FASTCALL(__lock_page_nosync(struct page *page));
-extern void FASTCALL(unlock_page(struct page *page));
+extern void __lock_page(struct page *page);
+extern int __lock_page_killable(struct page *page);
+extern void __lock_page_nosync(struct page *page);
+extern void unlock_page(struct page *page);
 
 /*
  * lock_page may only be called if we have the page's inode pinned.
@@ -199,7 +199,7 @@ static inline void lock_page_nosync(struct page *page)
  * This is exported only for wait_on_page_locked/wait_on_page_writeback.
  * Never use this directly!
  */
-extern void FASTCALL(wait_on_page_bit(struct page *page, int bit_nr));
+extern void wait_on_page_bit(struct page *page, int bit_nr);
 
 /* 
  * Wait for a page to be unlocked.
diff --git a/include/linux/pid.h b/include/linux/pid.h
index f84d532b5d23..c7980810eb09 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -79,10 +79,9 @@ static inline struct pid *get_pid(struct pid *pid)
 	return pid;
 }
 
-extern void FASTCALL(put_pid(struct pid *pid));
-extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type));
-extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
-						enum pid_type));
+extern void put_pid(struct pid *pid);
+extern struct task_struct *pid_task(struct pid *pid, enum pid_type);
+extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type);
 
 extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
 
@@ -90,11 +89,11 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
  * attach_pid() and detach_pid() must be called with the tasklist_lock
  * write-held.
  */
-extern int FASTCALL(attach_pid(struct task_struct *task,
-				enum pid_type type, struct pid *pid));
-extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type));
-extern void FASTCALL(transfer_pid(struct task_struct *old,
-				  struct task_struct *new, enum pid_type));
+extern int attach_pid(struct task_struct *task, enum pid_type type,
+		      struct pid *pid);
+extern void detach_pid(struct task_struct *task, enum pid_type);
+extern void transfer_pid(struct task_struct *old, struct task_struct *new,
+			 enum pid_type);
 
 struct pid_namespace;
 extern struct pid_namespace init_pid_ns;
@@ -109,7 +108,7 @@ extern struct pid_namespace init_pid_ns;
  *
  * see also find_task_by_pid() set in include/linux/sched.h
  */
-extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns));
+extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns);
 extern struct pid *find_vpid(int nr);
 extern struct pid *find_pid(int nr);
 
@@ -121,7 +120,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
 int next_pidmap(struct pid_namespace *pid_ns, int last);
 
 extern struct pid *alloc_pid(struct pid_namespace *ns);
-extern void FASTCALL(free_pid(struct pid *pid));
+extern void free_pid(struct pid *pid);
 
 /*
  * the helpers to get the pid's id seen from different namespaces
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
index 813cee13da0d..6c3c0f6c261f 100644
--- a/include/linux/rwsem-spinlock.h
+++ b/include/linux/rwsem-spinlock.h
@@ -60,14 +60,14 @@ do {								\
 	__init_rwsem((sem), #sem, &__key);			\
 } while (0)
 
-extern void FASTCALL(__down_read(struct rw_semaphore *sem));
-extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem));
-extern void FASTCALL(__down_write(struct rw_semaphore *sem));
-extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass));
-extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem));
-extern void FASTCALL(__up_read(struct rw_semaphore *sem));
-extern void FASTCALL(__up_write(struct rw_semaphore *sem));
-extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem));
+extern void __down_read(struct rw_semaphore *sem);
+extern int __down_read_trylock(struct rw_semaphore *sem);
+extern void __down_write(struct rw_semaphore *sem);
+extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+extern int __down_write_trylock(struct rw_semaphore *sem);
+extern void __up_read(struct rw_semaphore *sem);
+extern void __up_write(struct rw_semaphore *sem);
+extern void __downgrade_write(struct rw_semaphore *sem);
 
 static inline int rwsem_is_locked(struct rw_semaphore *sem)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b9bb313fe1ae..e217d188a102 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -323,7 +323,7 @@ extern char __sched_text_start[], __sched_text_end[];
 extern int in_sched_functions(unsigned long addr);
 
 #define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
-extern signed long FASTCALL(schedule_timeout(signed long timeout));
+extern signed long schedule_timeout(signed long timeout);
 extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_killable(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
@@ -1648,10 +1648,10 @@ extern void release_uids(struct user_namespace *ns);
 
 extern void do_timer(unsigned long ticks);
 
-extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
-						unsigned long clone_flags));
+extern int wake_up_state(struct task_struct *tsk, unsigned int state);
+extern int wake_up_process(struct task_struct *tsk);
+extern void wake_up_new_task(struct task_struct *tsk,
+				unsigned long clone_flags);
 #ifdef CONFIG_SMP
  extern void kick_process(struct task_struct *tsk);
 #else
@@ -1741,7 +1741,7 @@ static inline int sas_ss_flags(unsigned long sp)
 extern struct mm_struct * mm_alloc(void);
 
 /* mmdrop drops the mm and the page tables */
-extern void FASTCALL(__mmdrop(struct mm_struct *));
+extern void __mmdrop(struct mm_struct *);
 static inline void mmdrop(struct mm_struct * mm)
 {
 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
@@ -1925,7 +1925,7 @@ static inline int signal_pending(struct task_struct *p)
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
 
-extern int FASTCALL(__fatal_signal_pending(struct task_struct *p));
+extern int __fatal_signal_pending(struct task_struct *p);
 
 static inline int fatal_signal_pending(struct task_struct *p)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 3ca5c4bd6d3f..878459ae0454 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -171,10 +171,10 @@ extern unsigned int nr_free_pagecache_pages(void);
 
 
 /* linux/mm/swap.c */
-extern void FASTCALL(lru_cache_add(struct page *));
-extern void FASTCALL(lru_cache_add_active(struct page *));
-extern void FASTCALL(activate_page(struct page *));
-extern void FASTCALL(mark_page_accessed(struct page *));
+extern void lru_cache_add(struct page *);
+extern void lru_cache_add_active(struct page *);
+extern void activate_page(struct page *);
+extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern int lru_add_drain_all(void);
 extern int rotate_reclaimable_page(struct page *page);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 33a2aa9e02f2..0081147a9fe8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -117,9 +117,9 @@ static inline int waitqueue_active(wait_queue_head_t *q)
  */
 #define is_sync_wait(wait)	(!(wait) || ((wait)->private))
 
-extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
-extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
-extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
+extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait);
+extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait);
 
 static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
 {
@@ -141,16 +141,16 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
 	list_del(&old->task_list);
 }
 
-void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key));
-extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode));
-extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
-void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int));
-int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned));
-void FASTCALL(wake_up_bit(void *, int));
-int FASTCALL(out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned));
-int FASTCALL(out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned));
-wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int));
+void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
+extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode);
+extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr);
+void __wake_up_bit(wait_queue_head_t *, void *, int);
+int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned);
+void wake_up_bit(void *, int);
+int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned);
+int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned);
+wait_queue_head_t *bit_waitqueue(void *, int);
 
 #define wake_up(x)			__wake_up(x, TASK_NORMAL, 1, NULL)
 #define wake_up_nr(x, nr)		__wake_up(x, TASK_NORMAL, nr, NULL)
@@ -437,11 +437,9 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q,
 /*
  * Waitqueues which are removed from the waitqueue_head at wakeup time
  */
-void FASTCALL(prepare_to_wait(wait_queue_head_t *q,
-				wait_queue_t *wait, int state));
-void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q,
-				wait_queue_t *wait, int state));
-void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait));
+void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state);
+void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7f28c32d9aca..542526c6e8ef 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -178,18 +178,17 @@ __create_workqueue_key(const char *name, int singlethread,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay));
+extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *work, unsigned long delay);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 
-extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
+extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct delayed_work *work,
-					unsigned long delay));
+extern int schedule_work(struct work_struct *work);
+extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-- 
cgit v1.2.3


From c1a220e7acf8ad2c03504891f4a70cd9c32c904b Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 23 Jul 2008 21:28:39 -0700
Subject: pm: introduce new interfaces schedule_work_on() and queue_work_on()

This interface allows adding a job on a specific cpu.

Although a work struct on a cpu will be scheduled to other cpu if the cpu
dies, there is a recursion if a work task tries to offline the cpu it's
running on.  we need to schedule the task to a specific cpu in this case.
http://bugzilla.kernel.org/show_bug.cgi?id=10897

[oleg@tv-sign.ru: cleanups]
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Rus <harbour@sfinx.od.ua>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 39 ++++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 542526c6e8ef..14d47120682b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -179,6 +179,8 @@ __create_workqueue_key(const char *name, int singlethread,
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
 extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern int queue_work_on(int cpu, struct workqueue_struct *wq,
+			struct work_struct *work);
 extern int queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
@@ -188,6 +190,7 @@ extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
 extern int schedule_work(struct work_struct *work);
+extern int schedule_work_on(int cpu, struct work_struct *work);
 extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a6d36346d10a..6fd158b21026 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -140,7 +140,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	wake_up(&cwq->more_work);
 }
 
-/* Preempt must be disabled. */
 static void __queue_work(struct cpu_workqueue_struct *cwq,
 			 struct work_struct *work)
 {
@@ -175,6 +174,31 @@ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
+/**
+ * queue_work_on - queue work on specific cpu
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to a specific CPU, the caller must ensure it
+ * can't go away.
+ */
+int
+queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+{
+	int ret = 0;
+
+	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+		BUG_ON(!list_empty(&work->entry));
+		__queue_work(wq_per_cpu(wq, cpu), work);
+		ret = 1;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_on);
+
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -553,6 +577,19 @@ int schedule_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(schedule_work);
 
+/*
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+int schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, keventd_wq, work);
+}
+EXPORT_SYMBOL(schedule_work_on);
+
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
  * @dwork: job to be done
-- 
cgit v1.2.3


From db700897224b5ebdf852f2d38920ce428940d059 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Fri, 25 Jul 2008 01:47:49 -0700
Subject: workqueues: implement flush_work()

Most of users of flush_workqueue() can be changed to use cancel_work_sync(),
but sometimes we really need to wait for the completion and cancelling is not
an option. schedule_on_each_cpu() is good example.

Add the new helper, flush_work(work), which waits for the completion of the
specific work_struct. More precisely, it "flushes" the result of of the last
queue_work() which is visible to the caller.

For example, this code

	queue_work(wq, work);
	/* WINDOW */
	queue_work(wq, work);

	flush_work(work);

doesn't necessary work "as expected". What can happen in the WINDOW above is

	- wq starts the execution of work->func()

	- the caller migrates to another CPU

now, after the 2nd queue_work() this work is active on the previous CPU, and
at the same time it is queued on another. In this case flush_work(work) may
return before the first work->func() completes.

It is trivial to add another helper

	int flush_work_sync(struct work_struct *work)
	{
		return flush_work(work) || wait_on_work(work);
	}

which works "more correctly", but it has to iterate over all CPUs and thus
it much slower than flush_work().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-by: Max Krasnyansky <maxk@qualcomm.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 14d47120682b..5c158c477ac7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -201,6 +201,8 @@ extern int keventd_up(void);
 extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
+extern int flush_work(struct work_struct *work);
+
 extern int cancel_work_sync(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index d9a2d65cc63e..ee41cf857d55 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -423,6 +423,52 @@ void flush_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+/**
+ * flush_work - block until a work_struct's callback has terminated
+ * @work: the work which is to be flushed
+ *
+ * It is expected that, prior to calling flush_work(), the caller has
+ * arranged for the work to not be requeued, otherwise it doesn't make
+ * sense to use this function.
+ */
+int flush_work(struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq;
+	struct list_head *prev;
+	struct wq_barrier barr;
+
+	might_sleep();
+	cwq = get_wq_data(work);
+	if (!cwq)
+		return 0;
+
+	prev = NULL;
+	spin_lock_irq(&cwq->lock);
+	if (!list_empty(&work->entry)) {
+		/*
+		 * See the comment near try_to_grab_pending()->smp_rmb().
+		 * If it was re-queued under us we are not going to wait.
+		 */
+		smp_rmb();
+		if (unlikely(cwq != get_wq_data(work)))
+			goto out;
+		prev = &work->entry;
+	} else {
+		if (cwq->current_work != work)
+			goto out;
+		prev = &cwq->worklist;
+	}
+	insert_wq_barrier(cwq, &barr, prev->next);
+out:
+	spin_unlock_irq(&cwq->lock);
+	if (!prev)
+		return 0;
+
+	wait_for_completion(&barr.done);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(flush_work);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
-- 
cgit v1.2.3


From 0d557dc97f4bb501f086a03d0f00b99a7855d794 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 13 Oct 2008 23:50:09 +0200
Subject: workqueue: introduce create_rt_workqueue

create_rt_workqueue will create a real time prioritized workqueue.
This is needed for the conversion of stop_machine to a workqueue based
implementation.
This patch adds yet another parameter to __create_workqueue_key to tell
it that we want an rt workqueue.
However it looks like we rather should have something like "int type"
instead of singlethread, freezable and rt.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/workqueue.h | 18 ++++++++++--------
 kernel/workqueue.c        |  7 ++++++-
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5c158c477ac7..89a5a1231ffb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -149,11 +149,11 @@ struct execute_work {
 
 extern struct workqueue_struct *
 __create_workqueue_key(const char *name, int singlethread,
-		       int freezeable, struct lock_class_key *key,
+		       int freezeable, int rt, struct lock_class_key *key,
 		       const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable)	\
+#define __create_workqueue(name, singlethread, freezeable, rt)	\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -164,17 +164,19 @@ __create_workqueue_key(const char *name, int singlethread,
 		__lock_name = #name;				\
 								\
 	__create_workqueue_key((name), (singlethread),		\
-			       (freezeable), &__key,		\
+			       (freezeable), (rt), &__key,	\
 			       __lock_name);			\
 })
 #else
-#define __create_workqueue(name, singlethread, freezeable)	\
-	__create_workqueue_key((name), (singlethread), (freezeable), NULL, NULL)
+#define __create_workqueue(name, singlethread, freezeable, rt)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), (rt), \
+			       NULL, NULL)
 #endif
 
-#define create_workqueue(name) __create_workqueue((name), 0, 0)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
+#define create_workqueue(name) __create_workqueue((name), 0, 0, 0)
+#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
+#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 714afad46539..f928f2a87b9b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -62,6 +62,7 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
+	int rt;
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
 #endif
@@ -766,6 +767,7 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 	struct workqueue_struct *wq = cwq->wq;
 	const char *fmt = is_single_threaded(wq) ? "%s" : "%s/%d";
 	struct task_struct *p;
@@ -781,7 +783,8 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	 */
 	if (IS_ERR(p))
 		return PTR_ERR(p);
-
+	if (cwq->wq->rt)
+		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
 	cwq->thread = p;
 
 	return 0;
@@ -801,6 +804,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						int singlethread,
 						int freezeable,
+						int rt,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -822,6 +826,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
+	wq->rt = rt;
 	INIT_LIST_HEAD(&wq->list);
 
 	if (singlethread) {
-- 
cgit v1.2.3


From 2d3854a37e8b767a51aba38ed6d22817b0631e33 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 5 Nov 2008 13:39:10 +1100
Subject: cpumask: introduce new API, without changing anything

Impact: introduce new APIs

We want to deprecate cpumasks on the stack, as we are headed for
gynormous numbers of CPUs.  Eventually, we want to head towards an
undefined 'struct cpumask' so they can never be declared on stack.

1) New cpumask functions which take pointers instead of copies.
   (cpus_* -> cpumask_*)

2) Several new helpers to reduce requirements for temporary cpumasks
   (cpumask_first_and, cpumask_next_and, cpumask_any_and)

3) Helpers for declaring cpumasks on or offstack for large NR_CPUS
   (cpumask_var_t, alloc_cpumask_var and free_cpumask_var)

4) 'struct cpumask' for explicitness and to mark new-style code.

5) Make iterator functions stop at nr_cpu_ids (a runtime constant),
   not NR_CPUS for time efficiency and for smaller dynamic allocations
   in future.

6) cpumask_copy() so we can allocate less than a full cpumask eventually
   (for alloc_cpumask_var), and so we can eliminate the 'struct cpumask'
   definition eventually.

7) work_on_cpu() helper for doing task on a CPU, rather than saving old
   cpumask for current thread and manipulating it.

8) smp_call_function_many() which is smp_call_function_mask() except
   taking a cpumask pointer.

Note that this patch simply introduces the new functions and leaves
the obsolescent ones in place.  This is to simplify the transition
patches.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpumask.h   | 502 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/smp.h       |   9 +
 include/linux/workqueue.h |   8 +
 kernel/cpu.c              |   3 +
 kernel/workqueue.c        |  45 +++++
 lib/cpumask.c             |  73 +++++++
 6 files changed, 638 insertions(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index d3219d73f8e6..c8e66619097b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -5,6 +5,9 @@
  * Cpumasks provide a bitmap suitable for representing the
  * set of CPU's in a system, one bit position per CPU number.
  *
+ * The new cpumask_ ops take a "struct cpumask *"; the old ones
+ * use cpumask_t.
+ *
  * See detailed comments in the file linux/bitmap.h describing the
  * data type on which these cpumasks are based.
  *
@@ -31,7 +34,7 @@
  *       will span the entire range of NR_CPUS.
  * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
  *
- * The available cpumask operations are:
+ * The obsolescent cpumask operations are:
  *
  * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
  * void cpu_clear(cpu, mask)		turn off bit 'cpu' in mask
@@ -138,7 +141,7 @@
 #include <linux/threads.h>
 #include <linux/bitmap.h>
 
-typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
+typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 extern cpumask_t _unused_cpumask_arg_;
 
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
@@ -527,4 +530,499 @@ extern cpumask_t cpu_active_map;
 #define for_each_online_cpu(cpu)   for_each_cpu_mask_nr((cpu), cpu_online_map)
 #define for_each_present_cpu(cpu)  for_each_cpu_mask_nr((cpu), cpu_present_map)
 
+/* These are the new versions of the cpumask operators: passed by pointer.
+ * The older versions will be implemented in terms of these, then deleted. */
+#define cpumask_bits(maskp) ((maskp)->bits)
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL						\
+{								\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
+}
+
+/* This produces more efficient code. */
+#define nr_cpumask_bits	NR_CPUS
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL						\
+{								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
+}
+
+#define nr_cpumask_bits	nr_cpu_ids
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/* verify cpu argument to cpumask_* operators */
+static inline unsigned int cpumask_check(unsigned int cpu)
+{
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	WARN_ON_ONCE(cpu >= nr_cpumask_bits);
+#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+	return cpu;
+}
+
+#if NR_CPUS == 1
+/* Uniprocesor. */
+#define cpumask_first(src)		({ (void)(src); 0; })
+#define cpumask_next(n, src)		({ (void)(src); 1; })
+#define cpumask_next_zero(n, src)	({ (void)(src); 1; })
+#define cpumask_next_and(n, srcp, andp)	({ (void)(srcp), (void)(andp); 1; })
+#define cpumask_any_but(mask, cpu)	({ (void)(mask); (void)(cpu); 0; })
+
+#define for_each_cpu(cpu, mask)			\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#define for_each_cpu_and(cpu, mask, and)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and)
+#else
+/**
+ * cpumask_first - get the first cpu in a cpumask
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+static inline unsigned int cpumask_first(const struct cpumask *srcp)
+{
+	return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_next - get the next cpu in a cpumask
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus set.
+ */
+static inline unsigned int cpumask_next(int n, const struct cpumask *srcp)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
+}
+
+/**
+ * cpumask_next_zero - get the next unset cpu in a cpumask
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @srcp: the cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus unset.
+ */
+static inline unsigned int cpumask_next_zero(int n, const struct cpumask *srcp)
+{
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_zero_bit(cpumask_bits(srcp), nr_cpumask_bits, n+1);
+}
+
+int cpumask_next_and(int n, const struct cpumask *, const struct cpumask *);
+int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
+
+#define for_each_cpu(cpu, mask)				\
+	for ((cpu) = -1;				\
+		(cpu) = cpumask_next((cpu), (mask)),	\
+		(cpu) < nr_cpu_ids;)
+#define for_each_cpu_and(cpu, mask, and)				\
+	for ((cpu) = -1;						\
+		(cpu) = cpumask_next_and((cpu), (mask), (and)),		\
+		(cpu) < nr_cpu_ids;)
+#endif /* SMP */
+
+#define CPU_BITS_NONE						\
+{								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL			\
+}
+
+#define CPU_BITS_CPU0						\
+{								\
+	[0] =  1UL						\
+}
+
+/**
+ * cpumask_set_cpu - set a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_set_cpu(unsigned int cpu, struct cpumask *dstp)
+{
+	set_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+/**
+ * cpumask_clear_cpu - clear a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
+
+/**
+ * cpumask_test_cpu - test for a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * No static inline type checking - see Subtlety (1) above.
+ */
+#define cpumask_test_cpu(cpu, cpumask) \
+	test_bit(cpumask_check(cpu), (cpumask)->bits)
+
+/**
+ * cpumask_test_and_set_cpu - atomically test and set a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * test_and_set_bit wrapper for cpumasks.
+ */
+static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
+{
+	return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
+}
+
+/**
+ * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_setall(struct cpumask *dstp)
+{
+	bitmap_fill(cpumask_bits(dstp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_clear - clear all cpus (< nr_cpu_ids) in a cpumask
+ * @dstp: the cpumask pointer
+ */
+static inline void cpumask_clear(struct cpumask *dstp)
+{
+	bitmap_zero(cpumask_bits(dstp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_and - *dstp = *src1p & *src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_and(struct cpumask *dstp,
+			       const struct cpumask *src1p,
+			       const struct cpumask *src2p)
+{
+	bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
+				       cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_or - *dstp = *src1p | *src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_or(struct cpumask *dstp, const struct cpumask *src1p,
+			      const struct cpumask *src2p)
+{
+	bitmap_or(cpumask_bits(dstp), cpumask_bits(src1p),
+				      cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_xor - *dstp = *src1p ^ *src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_xor(struct cpumask *dstp,
+			       const struct cpumask *src1p,
+			       const struct cpumask *src2p)
+{
+	bitmap_xor(cpumask_bits(dstp), cpumask_bits(src1p),
+				       cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_andnot - *dstp = *src1p & ~*src2p
+ * @dstp: the cpumask result
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline void cpumask_andnot(struct cpumask *dstp,
+				  const struct cpumask *src1p,
+				  const struct cpumask *src2p)
+{
+	bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
+					  cpumask_bits(src2p), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_complement - *dstp = ~*srcp
+ * @dstp: the cpumask result
+ * @srcp: the input to invert
+ */
+static inline void cpumask_complement(struct cpumask *dstp,
+				      const struct cpumask *srcp)
+{
+	bitmap_complement(cpumask_bits(dstp), cpumask_bits(srcp),
+					      nr_cpumask_bits);
+}
+
+/**
+ * cpumask_equal - *src1p == *src2p
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline bool cpumask_equal(const struct cpumask *src1p,
+				const struct cpumask *src2p)
+{
+	return bitmap_equal(cpumask_bits(src1p), cpumask_bits(src2p),
+						 nr_cpumask_bits);
+}
+
+/**
+ * cpumask_intersects - (*src1p & *src2p) != 0
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline bool cpumask_intersects(const struct cpumask *src1p,
+				     const struct cpumask *src2p)
+{
+	return bitmap_intersects(cpumask_bits(src1p), cpumask_bits(src2p),
+						      nr_cpumask_bits);
+}
+
+/**
+ * cpumask_subset - (*src1p & ~*src2p) == 0
+ * @src1p: the first input
+ * @src2p: the second input
+ */
+static inline int cpumask_subset(const struct cpumask *src1p,
+				 const struct cpumask *src2p)
+{
+	return bitmap_subset(cpumask_bits(src1p), cpumask_bits(src2p),
+						  nr_cpumask_bits);
+}
+
+/**
+ * cpumask_empty - *srcp == 0
+ * @srcp: the cpumask to that all cpus < nr_cpu_ids are clear.
+ */
+static inline bool cpumask_empty(const struct cpumask *srcp)
+{
+	return bitmap_empty(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_full - *srcp == 0xFFFFFFFF...
+ * @srcp: the cpumask to that all cpus < nr_cpu_ids are set.
+ */
+static inline bool cpumask_full(const struct cpumask *srcp)
+{
+	return bitmap_full(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_weight - Count of bits in *srcp
+ * @srcp: the cpumask to count bits (< nr_cpu_ids) in.
+ */
+static inline unsigned int cpumask_weight(const struct cpumask *srcp)
+{
+	return bitmap_weight(cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_shift_right - *dstp = *srcp >> n
+ * @dstp: the cpumask result
+ * @srcp: the input to shift
+ * @n: the number of bits to shift by
+ */
+static inline void cpumask_shift_right(struct cpumask *dstp,
+				       const struct cpumask *srcp, int n)
+{
+	bitmap_shift_right(cpumask_bits(dstp), cpumask_bits(srcp), n,
+					       nr_cpumask_bits);
+}
+
+/**
+ * cpumask_shift_left - *dstp = *srcp << n
+ * @dstp: the cpumask result
+ * @srcp: the input to shift
+ * @n: the number of bits to shift by
+ */
+static inline void cpumask_shift_left(struct cpumask *dstp,
+				      const struct cpumask *srcp, int n)
+{
+	bitmap_shift_left(cpumask_bits(dstp), cpumask_bits(srcp), n,
+					      nr_cpumask_bits);
+}
+
+/**
+ * cpumask_copy - *dstp = *srcp
+ * @dstp: the result
+ * @srcp: the input cpumask
+ */
+static inline void cpumask_copy(struct cpumask *dstp,
+				const struct cpumask *srcp)
+{
+	bitmap_copy(cpumask_bits(dstp), cpumask_bits(srcp), nr_cpumask_bits);
+}
+
+/**
+ * cpumask_any - pick a "random" cpu from *srcp
+ * @srcp: the input cpumask
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+#define cpumask_any(srcp) cpumask_first(srcp)
+
+/**
+ * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
+ * @src1p: the first input
+ * @src2p: the second input
+ *
+ * Returns >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
+ */
+#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p))
+
+/**
+ * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2
+ * @mask1: the first input cpumask
+ * @mask2: the second input cpumask
+ *
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+#define cpumask_any_and(mask1, mask2) cpumask_first_and((mask1), (mask2))
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)						\
+	((struct cpumask *)(1 ? (bitmap)				\
+			    : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+	return 1;
+}
+
+/**
+ * cpumask_size - size to allocate for a 'struct cpumask' in bytes
+ *
+ * This will eventually be a runtime variable, depending on nr_cpu_ids.
+ */
+static inline size_t cpumask_size(void)
+{
+	/* FIXME: Once all cpumask assignments are eliminated, this
+	 * can be nr_cpumask_bits */
+	return BITS_TO_LONGS(NR_CPUS) * sizeof(long);
+}
+
+/*
+ * cpumask_var_t: struct cpumask for stack usage.
+ *
+ * Oh, the wicked games we play!  In order to make kernel coding a
+ * little more difficult, we typedef cpumask_var_t to an array or a
+ * pointer: doing &mask on an array is a noop, so it still works.
+ *
+ * ie.
+ *	cpumask_var_t tmpmask;
+ *	if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
+ *		return -ENOMEM;
+ *
+ *	  ... use 'tmpmask' like a normal struct cpumask * ...
+ *
+ *	free_cpumask_var(tmpmask);
+ */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+typedef struct cpumask *cpumask_var_t;
+
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
+void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
+void free_cpumask_var(cpumask_var_t mask);
+
+#else
+typedef struct cpumask cpumask_var_t[1];
+
+static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	return true;
+}
+
+static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+{
+}
+
+static inline void free_cpumask_var(cpumask_var_t mask)
+{
+}
+#endif /* CONFIG_CPUMASK_OFFSTACK */
+
+/* The pointer versions of the maps, these will become the primary versions. */
+#define cpu_possible_mask ((const struct cpumask *)&cpu_possible_map)
+#define cpu_online_mask ((const struct cpumask *)&cpu_online_map)
+#define cpu_present_mask ((const struct cpumask *)&cpu_present_map)
+#define cpu_active_mask ((const struct cpumask *)&cpu_active_map)
+
+/* It's common to want to use cpu_all_mask in struct member initializers,
+ * so it has to refer to an address rather than a pointer. */
+extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
+#define cpu_all_mask to_cpumask(cpu_all_bits)
+
+/* First bits of cpu_bit_bitmap are in fact unset. */
+#define cpu_none_mask to_cpumask(cpu_bit_bitmap[0])
+
+/* Wrappers for arch boot code to manipulate normally-constant masks */
+static inline void set_cpu_possible(unsigned int cpu, bool possible)
+{
+	if (possible)
+		cpumask_set_cpu(cpu, &cpu_possible_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_possible_map);
+}
+
+static inline void set_cpu_present(unsigned int cpu, bool present)
+{
+	if (present)
+		cpumask_set_cpu(cpu, &cpu_present_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_present_map);
+}
+
+static inline void set_cpu_online(unsigned int cpu, bool online)
+{
+	if (online)
+		cpumask_set_cpu(cpu, &cpu_online_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_online_map);
+}
+
+static inline void set_cpu_active(unsigned int cpu, bool active)
+{
+	if (active)
+		cpumask_set_cpu(cpu, &cpu_active_map);
+	else
+		cpumask_clear_cpu(cpu, &cpu_active_map);
+}
+
+static inline void init_cpu_present(const struct cpumask *src)
+{
+	cpumask_copy(&cpu_present_map, src);
+}
+
+static inline void init_cpu_possible(const struct cpumask *src)
+{
+	cpumask_copy(&cpu_possible_map, src);
+}
+
+static inline void init_cpu_online(const struct cpumask *src)
+{
+	cpumask_copy(&cpu_online_map, src);
+}
 #endif /* __LINUX_CPUMASK_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 2e4d58b26c06..3f9a60043a97 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -64,8 +64,17 @@ extern void smp_cpus_done(unsigned int max_cpus);
  * Call a function on all other processors
  */
 int smp_call_function(void(*func)(void *info), void *info, int wait);
+/* Deprecated: use smp_call_function_many() which uses a cpumask ptr. */
 int smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
 				int wait);
+
+static inline void smp_call_function_many(const struct cpumask *mask,
+					  void (*func)(void *info), void *info,
+					  int wait)
+{
+	smp_call_function_mask(*mask, func, info, wait);
+}
+
 int smp_call_function_single(int cpuid, void (*func) (void *info), void *info,
 				int wait);
 void __smp_call_function_single(int cpuid, struct call_single_data *data);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 89a5a1231ffb..b36291130f22 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -240,4 +240,12 @@ void cancel_rearming_delayed_work(struct delayed_work *work)
 	cancel_delayed_work_sync(work);
 }
 
+#ifndef CONFIG_SMP
+static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+{
+	return fn(arg);
+}
+#else
+long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
+#endif /* CONFIG_SMP */
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045daed..5a732c5ef08b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
 #endif
 };
 EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
+
+const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
+EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f928f2a87b9b..d4dc69ddebd7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@ undo:
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+struct work_for_cpu {
+	struct work_struct work;
+	long (*fn)(void *);
+	void *arg;
+	long ret;
+};
+
+static void do_work_for_cpu(struct work_struct *w)
+{
+	struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
+
+	wfc->ret = wfc->fn(wfc->arg);
+}
+
+/**
+ * work_on_cpu - run a function in user context on a particular cpu
+ * @cpu: the cpu to run on
+ * @fn: the function to run
+ * @arg: the function arg
+ *
+ * This will return -EINVAL in the cpu is not online, or the return value
+ * of @fn otherwise.
+ */
+long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+{
+	struct work_for_cpu wfc;
+
+	INIT_WORK(&wfc.work, do_work_for_cpu);
+	wfc.fn = fn;
+	wfc.arg = arg;
+	get_online_cpus();
+	if (unlikely(!cpu_online(cpu)))
+		wfc.ret = -EINVAL;
+	else {
+		schedule_work_on(cpu, &wfc.work);
+		flush_work(&wfc.work);
+	}
+	put_online_cpus();
+
+	return wfc.ret;
+}
+EXPORT_SYMBOL_GPL(work_on_cpu);
+#endif /* CONFIG_SMP */
+
 void __init init_workqueues(void)
 {
 	cpu_populated_map = cpu_online_map;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 5f97dc25ef9c..5ceb4211c834 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -2,6 +2,7 @@
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
 
 int __first_cpu(const cpumask_t *srcp)
 {
@@ -35,3 +36,75 @@ int __any_online_cpu(const cpumask_t *mask)
 	return cpu;
 }
 EXPORT_SYMBOL(__any_online_cpu);
+
+/**
+ * cpumask_next_and - get the next cpu in *src1p & *src2p
+ * @n: the cpu prior to the place to search (ie. return will be > @n)
+ * @src1p: the first cpumask pointer
+ * @src2p: the second cpumask pointer
+ *
+ * Returns >= nr_cpu_ids if no further cpus set in both.
+ */
+int cpumask_next_and(int n, const struct cpumask *src1p,
+		     const struct cpumask *src2p)
+{
+	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
+		if (cpumask_test_cpu(n, src2p))
+			break;
+	return n;
+}
+EXPORT_SYMBOL(cpumask_next_and);
+
+/**
+ * cpumask_any_but - return a "random" in a cpumask, but not this one.
+ * @mask: the cpumask to search
+ * @cpu: the cpu to ignore.
+ *
+ * Often used to find any cpu but smp_processor_id() in a mask.
+ * Returns >= nr_cpu_ids if no cpus set.
+ */
+int cpumask_any_but(const struct cpumask *mask, unsigned int cpu)
+{
+	unsigned int i;
+
+	for_each_cpu(i, mask)
+		if (i != cpu)
+			break;
+	return i;
+}
+
+/* These are not inline because of header tangles. */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+	if (likely(slab_is_available()))
+		*mask = kmalloc(cpumask_size(), flags);
+	else {
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+		printk(KERN_ERR
+			"=> alloc_cpumask_var: kmalloc not available!\n");
+		dump_stack();
+#endif
+		*mask = NULL;
+	}
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+	if (!*mask) {
+		printk(KERN_ERR "=> alloc_cpumask_var: failed!\n");
+		dump_stack();
+	}
+#endif
+	return *mask != NULL;
+}
+EXPORT_SYMBOL(alloc_cpumask_var);
+
+void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
+{
+	*mask = alloc_bootmem(cpumask_size());
+}
+
+void free_cpumask_var(cpumask_var_t mask)
+{
+	kfree(mask);
+}
+EXPORT_SYMBOL(free_cpumask_var);
+#endif
-- 
cgit v1.2.3


From 6d612b0f943289856c6e8186c564cda922cd040e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 12 Jan 2009 12:52:23 +0100
Subject: locking, hpet: annotate false positive warning

Alexander Beregalov reported that this warning is caused by the HPET code:

> hpet0: at MMIO 0xfed00000, IRQs 2, 8, 0
> hpet0: 3 comparators, 64-bit 14.318180 MHz counter
> ODEBUG: object is on stack, but not annotated
> ------------[ cut here ]------------
> WARNING: at lib/debugobjects.c:251 __debug_object_init+0x2a4/0x352()

> Bisected down to 26afe5f2fbf06ea0765aaa316640c4dd472310c0
> (x86: HPET_MSI Initialise per-cpu HPET timers)

The commit is fine - but the on-stack workqueue entry needs annotation.

Reported-and-bisected-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/hpet.c    | 2 +-
 include/linux/workqueue.h | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cd759ad90690..bb2e0f0975ae 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -628,7 +628,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 
 	switch (action & 0xf) {
 	case CPU_ONLINE:
-		INIT_DELAYED_WORK(&work.work, hpet_work);
+		INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
 		init_completion(&work.complete);
 		/* FIXME: add schedule_work_on() */
 		schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b36291130f22..47151c8495aa 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -118,6 +118,12 @@ struct execute_work {
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
+#define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func));		\
+		init_timer_on_stack(&(_work)->timer);		\
+	} while (0)
+
 #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
-- 
cgit v1.2.3


From 336f6c322d87806ef93afad6308ac65083a865e5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 22 Jan 2009 09:50:44 +0100
Subject: debugobjects: add and use INIT_WORK_ON_STACK

Impact: Fix debugobjects warning

debugobject enabled kernels spit out a warning in hpet code due to a
workqueue which is initialized on stack.

Add INIT_WORK_ON_STACK() which calls init_timer_on_stack() and use it
in hpet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c    | 3 ++-
 include/linux/workqueue.h | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cd759ad90690..64d5ad0b8add 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -628,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 
 	switch (action & 0xf) {
 	case CPU_ONLINE:
-		INIT_DELAYED_WORK(&work.work, hpet_work);
+		INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
 		init_completion(&work.complete);
 		/* FIXME: add schedule_work_on() */
 		schedule_delayed_work_on(cpu, &work.work, 0);
 		wait_for_completion(&work.complete);
+		destroy_timer_on_stack(&work.work.timer);
 		break;
 	case CPU_DEAD:
 		if (hdev) {
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b36291130f22..20b59eb1facd 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -124,6 +124,12 @@ struct execute_work {
 		init_timer_deferrable(&(_work)->timer);		\
 	} while (0)
 
+#define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
+	do {							\
+		INIT_WORK(&(_work)->work, (_func));		\
+		init_timer_on_stack(&(_work)->timer);		\
+	} while (0)
+
 /**
  * work_pending - Find out whether a work item is currently pending
  * @work: The work item in question
-- 
cgit v1.2.3


From bf6aede712334d7338d5c47a5ee5ba3883c82a61 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Thu, 2 Apr 2009 16:56:54 -0700
Subject: workqueue: add to_delayed_work() helper function

It is a fairly common operation to have a pointer to a work and to need a
pointer to the delayed work it is contained in.  In particular, all
delayed works which want to rearm themselves will have to do that.  So it
would seem fair to offer a helper function for this operation.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Greg KH <greg@kroah.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/vio.c                               | 2 +-
 drivers/crypto/hifn_795x.c                              | 2 +-
 drivers/input/mouse/hgpk.c                              | 2 +-
 drivers/net/dm9000.c                                    | 2 +-
 drivers/net/mlx4/en_netdev.c                            | 2 +-
 drivers/net/mlx4/en_rx.c                                | 2 +-
 drivers/net/mlx4/sense.c                                | 2 +-
 drivers/net/phy/phy.c                                   | 3 +--
 drivers/s390/scsi/zfcp_fc.c                             | 2 +-
 drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c | 8 ++++----
 drivers/staging/rtl8187se/r8180_core.c                  | 8 ++++----
 drivers/usb/wusbcore/devconnect.c                       | 2 +-
 include/linux/workqueue.h                               | 5 +++++
 mm/slab.c                                               | 3 +--
 14 files changed, 24 insertions(+), 21 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index d3694498f3af..819e59f6f7c7 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -482,7 +482,7 @@ static void vio_cmo_balance(struct work_struct *work)
 	cmo->excess.size = cmo->entitled - cmo->reserve.size;
 	cmo->excess.free = cmo->excess.size - need;
 
-	cancel_delayed_work(container_of(work, struct delayed_work, work));
+	cancel_delayed_work(to_delayed_work(work));
 	spin_unlock_irqrestore(&vio_cmo.lock, flags);
 }
 
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 0c79fe7f1567..4d85402a9e4a 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -1882,7 +1882,7 @@ static void hifn_clear_rings(struct hifn_device *dev, int error)
 
 static void hifn_work(struct work_struct *work)
 {
-	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct delayed_work *dw = to_delayed_work(work);
 	struct hifn_device *dev = container_of(dw, struct hifn_device, work);
 	unsigned long flags;
 	int reset = 0;
diff --git a/drivers/input/mouse/hgpk.c b/drivers/input/mouse/hgpk.c
index 81e6ebf323e9..55cd0fa68339 100644
--- a/drivers/input/mouse/hgpk.c
+++ b/drivers/input/mouse/hgpk.c
@@ -381,7 +381,7 @@ static void hgpk_disconnect(struct psmouse *psmouse)
 
 static void hgpk_recalib_work(struct work_struct *work)
 {
-	struct delayed_work *w = container_of(work, struct delayed_work, work);
+	struct delayed_work *w = to_delayed_work(work);
 	struct hgpk_data *priv = container_of(w, struct hgpk_data, recalib_wq);
 	struct psmouse *psmouse = priv->psmouse;
 
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 254ec62b5f58..d8350860c0f8 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -559,7 +559,7 @@ static void dm9000_show_carrier(board_info_t *db,
 static void
 dm9000_poll_work(struct work_struct *w)
 {
-	struct delayed_work *dw = container_of(w, struct delayed_work, work);
+	struct delayed_work *dw = to_delayed_work(w);
 	board_info_t *db = container_of(dw, board_info_t, phy_poll);
 	struct net_device *ndev = db->ndev;
 
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 9f6644a44030..303c23de6cac 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -505,7 +505,7 @@ out:
 
 static void mlx4_en_do_get_stats(struct work_struct *work)
 {
-	struct delayed_work *delay = container_of(work, struct delayed_work, work);
+	struct delayed_work *delay = to_delayed_work(work);
 	struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
 						 stats_task);
 	struct mlx4_en_dev *mdev = priv->mdev;
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index a4130e764991..7e40741fb7d8 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -298,7 +298,7 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
 
 void mlx4_en_rx_refill(struct work_struct *work)
 {
-	struct delayed_work *delay = container_of(work, struct delayed_work, work);
+	struct delayed_work *delay = to_delayed_work(work);
 	struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv,
 						 refill_task);
 	struct mlx4_en_dev *mdev = priv->mdev;
diff --git a/drivers/net/mlx4/sense.c b/drivers/net/mlx4/sense.c
index 6d5089ecb5af..f36ae691cab3 100644
--- a/drivers/net/mlx4/sense.c
+++ b/drivers/net/mlx4/sense.c
@@ -103,7 +103,7 @@ void mlx4_do_sense_ports(struct mlx4_dev *dev,
 
 static void mlx4_sense_port(struct work_struct *work)
 {
-	struct delayed_work *delay = container_of(work, struct delayed_work, work);
+	struct delayed_work *delay = to_delayed_work(work);
 	struct mlx4_sense *sense = container_of(delay, struct mlx4_sense,
 						sense_poll);
 	struct mlx4_dev *dev = sense->dev;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 58b73b08dde0..3ff1f425f1bb 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -757,8 +757,7 @@ EXPORT_SYMBOL(phy_start);
  */
 static void phy_state_machine(struct work_struct *work)
 {
-	struct delayed_work *dwork =
-			container_of(work, struct delayed_work, work);
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
 			container_of(dwork, struct phy_device, state_queue);
 	int needs_aneg = 0;
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index aab8123c5966..e8d032b9dfbd 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -94,7 +94,7 @@ static int zfcp_wka_port_get(struct zfcp_wka_port *wka_port)
 
 static void zfcp_wka_port_offline(struct work_struct *work)
 {
-	struct delayed_work *dw = container_of(work, struct delayed_work, work);
+	struct delayed_work *dw = to_delayed_work(work);
 	struct zfcp_wka_port *wka_port =
 			container_of(dw, struct zfcp_wka_port, work);
 
diff --git a/drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c b/drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c
index e5752f615e09..80f9cc7137c2 100644
--- a/drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c
+++ b/drivers/staging/rtl8187se/ieee80211/ieee80211_softmac.c
@@ -719,7 +719,7 @@ void ieee80211_softmac_scan(struct ieee80211_device *ieee)
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
 void ieee80211_softmac_scan_wq(struct work_struct *work)
 {
-	struct delayed_work *dwork = container_of(work, struct delayed_work, work);
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct ieee80211_device *ieee = container_of(dwork, struct ieee80211_device, softmac_scan_wq);
 #else
 void ieee80211_softmac_scan_wq(struct ieee80211_device *ieee)
@@ -777,7 +777,7 @@ out:
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
 void ieee80211_softmac_scan_wq(struct work_struct *work)
 {
-        struct delayed_work *dwork = container_of(work, struct delayed_work, work);
+	struct delayed_work *dwork = to_delayed_work(work);
         struct ieee80211_device *ieee = container_of(work, struct ieee80211_device, softmac_scan_wq);
 #else
 void ieee80211_softmac_scan_wq(struct ieee80211_device *ieee)
@@ -2980,7 +2980,7 @@ void ieee80211_start_monitor_mode(struct ieee80211_device *ieee)
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
 void ieee80211_start_ibss_wq(struct work_struct *work)
 {
-	struct delayed_work *dwork = container_of(work, struct delayed_work, work);
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct ieee80211_device *ieee = container_of(dwork, struct ieee80211_device, start_ibss_wq);
 #else
 void ieee80211_start_ibss_wq(struct ieee80211_device *ieee)
@@ -3162,7 +3162,7 @@ void ieee80211_disassociate(struct ieee80211_device *ieee)
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
 void ieee80211_associate_retry_wq(struct work_struct *work)
 {
-	struct delayed_work *dwork = container_of(work, struct delayed_work, work);
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct ieee80211_device *ieee = container_of(dwork, struct ieee80211_device, associate_retry_wq);
 #else
 void ieee80211_associate_retry_wq(struct ieee80211_device *ieee)
diff --git a/drivers/staging/rtl8187se/r8180_core.c b/drivers/staging/rtl8187se/r8180_core.c
index 66de5cc8ddf1..ff1f23f99f27 100644
--- a/drivers/staging/rtl8187se/r8180_core.c
+++ b/drivers/staging/rtl8187se/r8180_core.c
@@ -5438,7 +5438,7 @@ void rtl8180_hw_wakeup_wq (struct work_struct *work)
 //	struct r8180_priv *priv = container_of(work, struct r8180_priv, watch_dog_wq);
 //	struct ieee80211_device * ieee = (struct ieee80211_device*)
 //	                                       container_of(work, struct ieee80211_device, watch_dog_wq);
-	struct delayed_work *dwork = container_of(work,struct delayed_work,work);
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct ieee80211_device *ieee = container_of(dwork,struct ieee80211_device,hw_wakeup_wq);
 	struct net_device *dev = ieee->dev;
 #else
@@ -5459,7 +5459,7 @@ void rtl8180_hw_sleep_wq (struct work_struct *work)
 //      struct r8180_priv *priv = container_of(work, struct r8180_priv, watch_dog_wq);
 //      struct ieee80211_device * ieee = (struct ieee80211_device*)
 //                                             container_of(work, struct ieee80211_device, watch_dog_wq);
-        struct delayed_work *dwork = container_of(work,struct delayed_work,work);
+	struct delayed_work *dwork = to_delayed_work(work);
         struct ieee80211_device *ieee = container_of(dwork,struct ieee80211_device,hw_sleep_wq);
         struct net_device *dev = ieee->dev;
 #else
@@ -6407,7 +6407,7 @@ priv->txnpring)/8);
 void rtl8180_tx_irq_wq(struct work_struct *work)
 {
 	//struct r8180_priv *priv = container_of(work, struct r8180_priv, reset_wq);
-        struct delayed_work *dwork = container_of(work,struct delayed_work,work);
+	struct delayed_work *dwork = to_delayed_work(work);
 	struct ieee80211_device * ieee = (struct ieee80211_device*)
 	                                       container_of(dwork, struct ieee80211_device, watch_dog_wq);
 	struct net_device *dev = ieee->dev;
@@ -6691,7 +6691,7 @@ lizhaoming--------------------------- RF power on/power off -----------------
 #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
 void GPIOChangeRFWorkItemCallBack(struct work_struct *work)
 {
-	//struct delayed_work *dwork = container_of(work, struct delayed_work, work);
+	//struct delayed_work *dwork = to_delayed_work(work);
 	struct ieee80211_device *ieee = container_of(work, struct ieee80211_device, GPIOChangeRFWorkItem.work);
 	struct net_device *dev = ieee->dev;
 	struct r8180_priv *priv = ieee80211_priv(dev);
diff --git a/drivers/usb/wusbcore/devconnect.c b/drivers/usb/wusbcore/devconnect.c
index f0aac0cf315a..386eaa22d215 100644
--- a/drivers/usb/wusbcore/devconnect.c
+++ b/drivers/usb/wusbcore/devconnect.c
@@ -471,7 +471,7 @@ static void __wusbhc_keep_alive(struct wusbhc *wusbhc)
  */
 static void wusbhc_keep_alive_run(struct work_struct *ws)
 {
-	struct delayed_work *dw = container_of(ws, struct delayed_work, work);
+	struct delayed_work *dw = to_delayed_work(ws);
 	struct wusbhc *wusbhc =	container_of(dw, struct wusbhc, keep_alive_timer);
 
 	mutex_lock(&wusbhc->mutex);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3cd51e579ab1..13e1adf55c4c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -41,6 +41,11 @@ struct delayed_work {
 	struct timer_list timer;
 };
 
+static inline struct delayed_work *to_delayed_work(struct work_struct *work)
+{
+	return container_of(work, struct delayed_work, work);
+}
+
 struct execute_work {
 	struct work_struct work;
 };
diff --git a/mm/slab.c b/mm/slab.c
index 825c606f691d..208323fd37bc 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3992,8 +3992,7 @@ static void cache_reap(struct work_struct *w)
 	struct kmem_cache *searchp;
 	struct kmem_list3 *l3;
 	int node = numa_node_id();
-	struct delayed_work *work =
-		container_of(w, struct delayed_work, work);
+	struct delayed_work *work = to_delayed_work(w);
 
 	if (!mutex_trylock(&cache_chain_mutex))
 		/* Give up. Setup the next iteration. */
-- 
cgit v1.2.3


From 4e49627b9bc29a14b393c480e8c979e3bc922ef7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 5 Sep 2009 11:17:06 -0700
Subject: workqueues: introduce __cancel_delayed_work()

cancel_delayed_work() has to use del_timer_sync() to guarantee the timer
function is not running after return.  But most users doesn't actually
need this, and del_timer_sync() has problems: it is not useable from
interrupt, and it depends on every lock which could be taken from irq.

Introduce __cancel_delayed_work() which calls del_timer() instead.

The immediate reason for this patch is
http://bugzilla.kernel.org/show_bug.cgi?id=13757
but hopefully this helper makes sense anyway.

As for 13757 bug, actually we need requeue_delayed_work(), but its
semantics are not yet clear.

Merge this patch early to resolves cross-tree interdependencies between
input and infiniband.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 13e1adf55c4c..6273fa97b527 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
+/*
+ * Like above, but uses del_timer() instead of del_timer_sync(). This means,
+ * if it returns 0 the timer function may be running and the queueing is in
+ * progress.
+ */
+static inline int __cancel_delayed_work(struct delayed_work *work)
+{
+	int ret;
+
+	ret = del_timer(&work->timer);
+	if (ret)
+		work_clear_pending(&work->work);
+	return ret;
+}
+
 extern int cancel_delayed_work_sync(struct delayed_work *work);
 
 /* Obsolete. use cancel_delayed_work_sync() */
-- 
cgit v1.2.3


From b9049df5a0e7f35456c06b949b08b898b9c2e7bc Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Date: Tue, 23 Jun 2009 12:09:29 +0200
Subject: Change "useing" -> "using".

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 6273fa97b527..7ef0c7b94f31 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -94,7 +94,7 @@ struct execute_work {
 /*
  * initialize all of a work item in one go
  *
- * NOTE! No point in using "atomic_long_set()": useing a direct
+ * NOTE! No point in using "atomic_long_set()": using a direct
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
-- 
cgit v1.2.3


From 43046b606673c9c991919ff75b980b72541e9ede Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 14 Oct 2009 09:16:42 -0700
Subject: workqueue: add 'flush_delayed_work()' to run and wait for delayed
 work

It basically turns a delayed work into an immediate work, and then waits
for it to finish.
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7ef0c7b94f31..cf24c20de9e4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -207,6 +207,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
+extern void flush_delayed_work(struct delayed_work *work);
 
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index addfe2df93b1..ccefe574dcf7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -639,6 +639,24 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
+/**
+ * flush_delayed_work - block until a dwork_struct's callback has terminated
+ * @dwork: the delayed work which is to be flushed
+ *
+ * Any timeout is cancelled, and any pending work is run immediately.
+ */
+void flush_delayed_work(struct delayed_work *dwork)
+{
+	if (del_timer(&dwork->timer)) {
+		struct cpu_workqueue_struct *cwq;
+		cwq = wq_per_cpu(keventd_wq, get_cpu());
+		__queue_work(cwq, &dwork->work);
+		put_cpu();
+	}
+	flush_work(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work);
+
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
-- 
cgit v1.2.3


From dc186ad741c12ae9ecac8b89e317ef706fdaf8f6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 16 Nov 2009 01:09:48 +0900
Subject: workqueue: Add debugobjects support

Add debugobject support to track the life time of work_structs.

While at it, remove duplicate definition of
INIT_DELAYED_WORK_ON_STACK().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/smpboot.c |   4 +-
 include/linux/workqueue.h |  38 ++++++++++----
 kernel/workqueue.c        | 131 ++++++++++++++++++++++++++++++++++++++++++++--
 lib/Kconfig.debug         |   8 +++
 4 files changed, 166 insertions(+), 15 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 565ebc65920e..ba43dfed353d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -687,7 +687,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 
-	INIT_WORK(&c_idle.work, do_fork_idle);
+	INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
 
 	alternatives_smp_switch(1);
 
@@ -713,6 +713,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 
 	if (IS_ERR(c_idle.idle)) {
 		printk("failed fork for CPU %d\n", cpu);
+		destroy_work_on_stack(&c_idle.work);
 		return PTR_ERR(c_idle.idle);
 	}
 
@@ -831,6 +832,7 @@ do_rest:
 		smpboot_restore_warm_reset_vector();
 	}
 
+	destroy_work_on_stack(&c_idle.work);
 	return boot_error;
 }
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index cf24c20de9e4..9466e860d8c2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,6 +25,7 @@ typedef void (*work_func_t)(struct work_struct *work);
 struct work_struct {
 	atomic_long_t data;
 #define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
+#define WORK_STRUCT_STATIC  1		/* static initializer (debugobjects) */
 #define WORK_STRUCT_FLAG_MASK (3UL)
 #define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
@@ -35,6 +36,7 @@ struct work_struct {
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
+#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(2)
 
 struct delayed_work {
 	struct work_struct work;
@@ -63,7 +65,7 @@ struct execute_work {
 #endif
 
 #define __WORK_INITIALIZER(n, f) {				\
-	.data = WORK_DATA_INIT(),				\
+	.data = WORK_DATA_STATIC_INIT(),			\
 	.entry	= { &(n).entry, &(n).entry },			\
 	.func = (f),						\
 	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
@@ -91,6 +93,14 @@ struct execute_work {
 #define PREPARE_DELAYED_WORK(_work, _func)			\
 	PREPARE_WORK(&(_work)->work, (_func))
 
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+extern void __init_work(struct work_struct *work, int onstack);
+extern void destroy_work_on_stack(struct work_struct *work);
+#else
+static inline void __init_work(struct work_struct *work, int onstack) { }
+static inline void destroy_work_on_stack(struct work_struct *work) { }
+#endif
+
 /*
  * initialize all of a work item in one go
  *
@@ -99,24 +109,36 @@ struct execute_work {
  * to generate better code.
  */
 #ifdef CONFIG_LOCKDEP
-#define INIT_WORK(_work, _func)						\
+#define __INIT_WORK(_work, _func, _onstack)				\
 	do {								\
 		static struct lock_class_key __key;			\
 									\
+		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
 #else
-#define INIT_WORK(_work, _func)						\
+#define __INIT_WORK(_work, _func, _onstack)				\
 	do {								\
+		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
 #endif
 
+#define INIT_WORK(_work, _func)					\
+	do {							\
+		__INIT_WORK((_work), (_func), 0);		\
+	} while (0)
+
+#define INIT_WORK_ON_STACK(_work, _func)			\
+	do {							\
+		__INIT_WORK((_work), (_func), 1);		\
+	} while (0)
+
 #define INIT_DELAYED_WORK(_work, _func)				\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
@@ -125,22 +147,16 @@ struct execute_work {
 
 #define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
 	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
+		INIT_WORK_ON_STACK(&(_work)->work, (_func));	\
 		init_timer_on_stack(&(_work)->timer);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)		\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_deferrable(&(_work)->timer);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
-	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
-		init_timer_on_stack(&(_work)->timer);		\
-	} while (0)
-
 /**
  * work_pending - Find out whether a work item is currently pending
  * @work: The work item in question
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 12328147132c..ddad63fbb61b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,116 @@ struct workqueue_struct {
 #endif
 };
 
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+
+static struct debug_obj_descr work_debug_descr;
+
+/*
+ * fixup_init is called when:
+ * - an active object is initialized
+ */
+static int work_fixup_init(void *addr, enum debug_obj_state state)
+{
+	struct work_struct *work = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		cancel_work_sync(work);
+		debug_object_init(work, &work_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/*
+ * fixup_activate is called when:
+ * - an active object is activated
+ * - an unknown object is activated (might be a statically initialized object)
+ */
+static int work_fixup_activate(void *addr, enum debug_obj_state state)
+{
+	struct work_struct *work = addr;
+
+	switch (state) {
+
+	case ODEBUG_STATE_NOTAVAILABLE:
+		/*
+		 * This is not really a fixup. The work struct was
+		 * statically initialized. We just make sure that it
+		 * is tracked in the object tracker.
+		 */
+		if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) {
+			debug_object_init(work, &work_debug_descr);
+			debug_object_activate(work, &work_debug_descr);
+			return 0;
+		}
+		WARN_ON_ONCE(1);
+		return 0;
+
+	case ODEBUG_STATE_ACTIVE:
+		WARN_ON(1);
+
+	default:
+		return 0;
+	}
+}
+
+/*
+ * fixup_free is called when:
+ * - an active object is freed
+ */
+static int work_fixup_free(void *addr, enum debug_obj_state state)
+{
+	struct work_struct *work = addr;
+
+	switch (state) {
+	case ODEBUG_STATE_ACTIVE:
+		cancel_work_sync(work);
+		debug_object_free(work, &work_debug_descr);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static struct debug_obj_descr work_debug_descr = {
+	.name		= "work_struct",
+	.fixup_init	= work_fixup_init,
+	.fixup_activate	= work_fixup_activate,
+	.fixup_free	= work_fixup_free,
+};
+
+static inline void debug_work_activate(struct work_struct *work)
+{
+	debug_object_activate(work, &work_debug_descr);
+}
+
+static inline void debug_work_deactivate(struct work_struct *work)
+{
+	debug_object_deactivate(work, &work_debug_descr);
+}
+
+void __init_work(struct work_struct *work, int onstack)
+{
+	if (onstack)
+		debug_object_init_on_stack(work, &work_debug_descr);
+	else
+		debug_object_init(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(__init_work);
+
+void destroy_work_on_stack(struct work_struct *work)
+{
+	debug_object_free(work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_work_on_stack);
+
+#else
+static inline void debug_work_activate(struct work_struct *work) { }
+static inline void debug_work_deactivate(struct work_struct *work) { }
+#endif
+
 /* Serializes the accesses to the list of workqueues. */
 static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
@@ -145,6 +255,7 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 {
 	unsigned long flags;
 
+	debug_work_activate(work);
 	spin_lock_irqsave(&cwq->lock, flags);
 	insert_work(cwq, work, &cwq->worklist);
 	spin_unlock_irqrestore(&cwq->lock, flags);
@@ -280,6 +391,7 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 		struct lockdep_map lockdep_map = work->lockdep_map;
 #endif
 		trace_workqueue_execution(cwq->thread, work);
+		debug_work_deactivate(work);
 		cwq->current_work = work;
 		list_del_init(cwq->worklist.next);
 		spin_unlock_irq(&cwq->lock);
@@ -350,11 +462,18 @@ static void wq_barrier_func(struct work_struct *work)
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 			struct wq_barrier *barr, struct list_head *head)
 {
-	INIT_WORK(&barr->work, wq_barrier_func);
+	/*
+	 * debugobject calls are safe here even with cwq->lock locked
+	 * as we know for sure that this will not trigger any of the
+	 * checks and call back into the fixup functions where we
+	 * might deadlock.
+	 */
+	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
 
 	init_completion(&barr->done);
 
+	debug_work_activate(&barr->work);
 	insert_work(cwq, &barr->work, head);
 }
 
@@ -372,8 +491,10 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 	}
 	spin_unlock_irq(&cwq->lock);
 
-	if (active)
+	if (active) {
 		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+	}
 
 	return active;
 }
@@ -451,6 +572,7 @@ out:
 		return 0;
 
 	wait_for_completion(&barr.done);
+	destroy_work_on_stack(&barr.work);
 	return 1;
 }
 EXPORT_SYMBOL_GPL(flush_work);
@@ -485,6 +607,7 @@ static int try_to_grab_pending(struct work_struct *work)
 		 */
 		smp_rmb();
 		if (cwq == get_wq_data(work)) {
+			debug_work_deactivate(work);
 			list_del_init(&work->entry);
 			ret = 1;
 		}
@@ -507,8 +630,10 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 	}
 	spin_unlock_irq(&cwq->lock);
 
-	if (unlikely(running))
+	if (unlikely(running)) {
 		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+	}
 }
 
 static void wait_on_work(struct work_struct *work)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 234ceb10861f..c91f0519d493 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -298,6 +298,14 @@ config DEBUG_OBJECTS_TIMERS
 	  timer routines to track the life time of timer objects and
 	  validate the timer operations.
 
+config DEBUG_OBJECTS_WORK
+	bool "Debug work objects"
+	depends on DEBUG_OBJECTS
+	help
+	  If you say Y here, additional code will be inserted into the
+	  work queue routines to track the life time of work objects and
+	  validate the work operations.
+
 config DEBUG_OBJECTS_ENABLE_DEFAULT
 	int "debug_objects bootup default value (0-1)"
         range 0 1
-- 
cgit v1.2.3


From a25909a4d4a29e272f953e12595bf2f04a292dbd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 13 May 2010 12:32:28 -0700
Subject: lockdep: Add an in_workqueue_context() lockdep-based test function

Some recent uses of RCU make use of workqueues.  In these uses, execution
within the context of a specific workqueue takes the place of the usual
RCU read-side primitives such as rcu_read_lock(), and flushing of workqueues
takes the place of the usual RCU grace-period primitives.  Checking for
correct use of rcu_dereference() in such cases requires a test of whether
the code is executing in the context of a particular workqueue.  This
commit adds an in_workqueue_context() function that provides this test.
This new function is only defined when lockdep is enabled, which allows
it to be used as the second argument of rcu_dereference_check().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/workqueue.h |  4 ++++
 kernel/workqueue.c        | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9466e860d8c2..d0f7c8178498 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,4 +297,8 @@ static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 #else
 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_LOCKDEP
+int in_workqueue_context(struct workqueue_struct *wq);
+#endif
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 327d2deb4451..59fef1531dd2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -68,6 +68,21 @@ struct workqueue_struct {
 #endif
 };
 
+#ifdef CONFIG_LOCKDEP
+/**
+ * in_workqueue_context() - in context of specified workqueue?
+ * @wq: the workqueue of interest
+ *
+ * Checks lockdep state to see if the current task is executing from
+ * within a workqueue item.  This function exists only if lockdep is
+ * enabled.
+ */
+int in_workqueue_context(struct workqueue_struct *wq)
+{
+	return lock_is_held(&wq->lockdep_map);
+}
+#endif
+
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
-- 
cgit v1.2.3


From c790bce0481857412c964c5e9d46d56e41c4b051 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:09 +0200
Subject: workqueue: kill RT workqueue

With stop_machine() converted to use cpu_stop, RT workqueue doesn't
have any user left.  Kill RT workqueue support.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 20 +++++++++-----------
 kernel/workqueue.c        |  6 ------
 2 files changed, 9 insertions(+), 17 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 9466e860d8c2..0697946c66a1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -181,12 +181,11 @@ static inline void destroy_work_on_stack(struct work_struct *work) { }
 
 
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, int singlethread,
-		       int freezeable, int rt, struct lock_class_key *key,
-		       const char *lock_name);
+__create_workqueue_key(const char *name, int singlethread, int freezeable,
+		       struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable, rt)	\
+#define __create_workqueue(name, singlethread, freezeable)	\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -197,19 +196,18 @@ __create_workqueue_key(const char *name, int singlethread,
 		__lock_name = #name;				\
 								\
 	__create_workqueue_key((name), (singlethread),		\
-			       (freezeable), (rt), &__key,	\
+			       (freezeable), &__key,		\
 			       __lock_name);			\
 })
 #else
-#define __create_workqueue(name, singlethread, freezeable, rt)	\
-	__create_workqueue_key((name), (singlethread), (freezeable), (rt), \
+#define __create_workqueue(name, singlethread, freezeable)	\
+	__create_workqueue_key((name), (singlethread), (freezeable), \
 			       NULL, NULL)
 #endif
 
-#define create_workqueue(name) __create_workqueue((name), 0, 0, 0)
-#define create_rt_workqueue(name) __create_workqueue((name), 0, 0, 1)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
+#define create_workqueue(name) __create_workqueue((name), 0, 0)
+#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
+#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 327d2deb4451..1a47fbf92fae 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -62,7 +62,6 @@ struct workqueue_struct {
 	const char *name;
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
-	int rt;
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map lockdep_map;
 #endif
@@ -947,7 +946,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
 
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
 	struct workqueue_struct *wq = cwq->wq;
 	const char *fmt = is_wq_single_threaded(wq) ? "%s" : "%s/%d";
 	struct task_struct *p;
@@ -963,8 +961,6 @@ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 	 */
 	if (IS_ERR(p))
 		return PTR_ERR(p);
-	if (cwq->wq->rt)
-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
 	cwq->thread = p;
 
 	trace_workqueue_creation(cwq->thread, cpu);
@@ -986,7 +982,6 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						int singlethread,
 						int freezeable,
-						int rt,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -1008,7 +1003,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	wq->singlethread = singlethread;
 	wq->freezeable = freezeable;
-	wq->rt = rt;
 	INIT_LIST_HEAD(&wq->list);
 
 	if (singlethread) {
-- 
cgit v1.2.3


From 4690c4ab56c71919893ca25252f2dd65b58188c7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:10 +0200
Subject: workqueue: misc/cosmetic updates

Make the following updates in preparation of concurrency managed
workqueue.  None of these changes causes any visible behavior
difference.

* Add comments and adjust indentations to data structures and several
  functions.

* Rename wq_per_cpu() to get_cwq() and swap the position of two
  parameters for consistency.  Convert a direct per_cpu_ptr() access
  to wq->cpu_wq to get_cwq().

* Add work_static() and Update set_wq_data() such that it sets the
  flags part to WORK_STRUCT_PENDING | WORK_STRUCT_STATIC if static |
  @extra_flags.

* Move santiy check on work->entry emptiness from queue_work_on() to
  __queue_work() which all queueing paths share.

* Make __queue_work() take @cpu and @wq instead of @cwq.

* Restructure flush_work() and __create_workqueue_key() to make them
  easier to modify.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   5 ++
 kernel/workqueue.c        | 131 +++++++++++++++++++++++++++++-----------------
 2 files changed, 89 insertions(+), 47 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0697946c66a1..e724dafc9e6d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -96,9 +96,14 @@ struct execute_work {
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 extern void __init_work(struct work_struct *work, int onstack);
 extern void destroy_work_on_stack(struct work_struct *work);
+static inline unsigned int work_static(struct work_struct *work)
+{
+	return *work_data_bits(work) & (1 << WORK_STRUCT_STATIC);
+}
 #else
 static inline void __init_work(struct work_struct *work, int onstack) { }
 static inline void destroy_work_on_stack(struct work_struct *work) { }
+static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #endif
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a47fbf92fae..c56146a755e5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -36,6 +36,16 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
 
+/*
+ * Structure fields follow one of the following exclusion rules.
+ *
+ * I: Set during initialization and read-only afterwards.
+ *
+ * L: cwq->lock protected.  Access with cwq->lock held.
+ *
+ * W: workqueue_lock protected.
+ */
+
 /*
  * The per-CPU workqueue (if single thread, we always use the first
  * possible cpu).
@@ -48,8 +58,8 @@ struct cpu_workqueue_struct {
 	wait_queue_head_t more_work;
 	struct work_struct *current_work;
 
-	struct workqueue_struct *wq;
-	struct task_struct *thread;
+	struct workqueue_struct *wq;		/* I: the owning workqueue */
+	struct task_struct	*thread;
 } ____cacheline_aligned;
 
 /*
@@ -57,13 +67,13 @@ struct cpu_workqueue_struct {
  * per-CPU workqueues:
  */
 struct workqueue_struct {
-	struct cpu_workqueue_struct *cpu_wq;
-	struct list_head list;
-	const char *name;
+	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
+	struct list_head	list;		/* W: list of all workqueues */
+	const char		*name;		/* I: workqueue name */
 	int singlethread;
 	int freezeable;		/* Freeze threads during suspend */
 #ifdef CONFIG_LOCKDEP
-	struct lockdep_map lockdep_map;
+	struct lockdep_map	lockdep_map;
 #endif
 };
 
@@ -204,8 +214,8 @@ static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
 		? cpu_singlethread_map : cpu_populated_map;
 }
 
-static
-struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
+static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
+					    struct workqueue_struct *wq)
 {
 	if (unlikely(is_wq_single_threaded(wq)))
 		cpu = singlethread_cpu;
@@ -217,15 +227,13 @@ struct cpu_workqueue_struct *wq_per_cpu(struct workqueue_struct *wq, int cpu)
  * - Must *only* be called if the pending flag is set
  */
 static inline void set_wq_data(struct work_struct *work,
-				struct cpu_workqueue_struct *cwq)
+			       struct cpu_workqueue_struct *cwq,
+			       unsigned long extra_flags)
 {
-	unsigned long new;
-
 	BUG_ON(!work_pending(work));
 
-	new = (unsigned long) cwq | (1UL << WORK_STRUCT_PENDING);
-	new |= WORK_STRUCT_FLAG_MASK & *work_data_bits(work);
-	atomic_long_set(&work->data, new);
+	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
+			(1UL << WORK_STRUCT_PENDING) | extra_flags);
 }
 
 /*
@@ -233,9 +241,7 @@ static inline void set_wq_data(struct work_struct *work,
  */
 static inline void clear_wq_data(struct work_struct *work)
 {
-	unsigned long flags = *work_data_bits(work) &
-				(1UL << WORK_STRUCT_STATIC);
-	atomic_long_set(&work->data, flags);
+	atomic_long_set(&work->data, work_static(work));
 }
 
 static inline
@@ -244,29 +250,47 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
+/**
+ * insert_work - insert a work into cwq
+ * @cwq: cwq @work belongs to
+ * @work: work to insert
+ * @head: insertion point
+ * @extra_flags: extra WORK_STRUCT_* flags to set
+ *
+ * Insert @work into @cwq after @head.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
 static void insert_work(struct cpu_workqueue_struct *cwq,
-			struct work_struct *work, struct list_head *head)
+			struct work_struct *work, struct list_head *head,
+			unsigned int extra_flags)
 {
 	trace_workqueue_insertion(cwq->thread, work);
 
-	set_wq_data(work, cwq);
+	/* we own @work, set data and link */
+	set_wq_data(work, cwq, extra_flags);
+
 	/*
 	 * Ensure that we get the right work->data if we see the
 	 * result of list_add() below, see try_to_grab_pending().
 	 */
 	smp_wmb();
+
 	list_add_tail(&work->entry, head);
 	wake_up(&cwq->more_work);
 }
 
-static void __queue_work(struct cpu_workqueue_struct *cwq,
+static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
+	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 	unsigned long flags;
 
 	debug_work_activate(work);
 	spin_lock_irqsave(&cwq->lock, flags);
-	insert_work(cwq, work, &cwq->worklist);
+	BUG_ON(!list_empty(&work->entry));
+	insert_work(cwq, work, &cwq->worklist, 0);
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -308,8 +332,7 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 	int ret = 0;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
-		BUG_ON(!list_empty(&work->entry));
-		__queue_work(wq_per_cpu(wq, cpu), work);
+		__queue_work(cpu, wq, work);
 		ret = 1;
 	}
 	return ret;
@@ -320,9 +343,8 @@ static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
-	struct workqueue_struct *wq = cwq->wq;
 
-	__queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
+	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 }
 
 /**
@@ -366,7 +388,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		timer_stats_timer_set_start_info(&dwork->timer);
 
 		/* This stores cwq for the moment, for the timer_fn */
-		set_wq_data(work, wq_per_cpu(wq, raw_smp_processor_id()));
+		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -430,6 +452,12 @@ static void run_workqueue(struct cpu_workqueue_struct *cwq)
 	spin_unlock_irq(&cwq->lock);
 }
 
+/**
+ * worker_thread - the worker thread function
+ * @__cwq: cwq to serve
+ *
+ * The cwq worker thread function.
+ */
 static int worker_thread(void *__cwq)
 {
 	struct cpu_workqueue_struct *cwq = __cwq;
@@ -468,6 +496,17 @@ static void wq_barrier_func(struct work_struct *work)
 	complete(&barr->done);
 }
 
+/**
+ * insert_wq_barrier - insert a barrier work
+ * @cwq: cwq to insert barrier into
+ * @barr: wq_barrier to insert
+ * @head: insertion point
+ *
+ * Insert barrier @barr into @cwq before @head.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 			struct wq_barrier *barr, struct list_head *head)
 {
@@ -479,11 +518,10 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	 */
 	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
-
 	init_completion(&barr->done);
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head);
+	insert_work(cwq, &barr->work, head, 0);
 }
 
 static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
@@ -517,9 +555,6 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  *
  * We sleep until all works which were queued on entry have been handled,
  * but we are not livelocked by new incoming ones.
- *
- * This function used to run the workqueues itself.  Now we just wait for the
- * helper threads to do it.
  */
 void flush_workqueue(struct workqueue_struct *wq)
 {
@@ -558,7 +593,6 @@ int flush_work(struct work_struct *work)
 	lock_map_acquire(&cwq->wq->lockdep_map);
 	lock_map_release(&cwq->wq->lockdep_map);
 
-	prev = NULL;
 	spin_lock_irq(&cwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
@@ -567,22 +601,22 @@ int flush_work(struct work_struct *work)
 		 */
 		smp_rmb();
 		if (unlikely(cwq != get_wq_data(work)))
-			goto out;
+			goto already_gone;
 		prev = &work->entry;
 	} else {
 		if (cwq->current_work != work)
-			goto out;
+			goto already_gone;
 		prev = &cwq->worklist;
 	}
 	insert_wq_barrier(cwq, &barr, prev->next);
-out:
-	spin_unlock_irq(&cwq->lock);
-	if (!prev)
-		return 0;
 
+	spin_unlock_irq(&cwq->lock);
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
 	return 1;
+already_gone:
+	spin_unlock_irq(&cwq->lock);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
@@ -665,7 +699,7 @@ static void wait_on_work(struct work_struct *work)
 	cpu_map = wq_cpu_map(wq);
 
 	for_each_cpu(cpu, cpu_map)
-		wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
+		wait_on_cpu_work(get_cwq(cpu, wq), work);
 }
 
 static int __cancel_work_timer(struct work_struct *work,
@@ -782,9 +816,8 @@ EXPORT_SYMBOL(schedule_delayed_work);
 void flush_delayed_work(struct delayed_work *dwork)
 {
 	if (del_timer_sync(&dwork->timer)) {
-		struct cpu_workqueue_struct *cwq;
-		cwq = wq_per_cpu(get_wq_data(&dwork->work)->wq, get_cpu());
-		__queue_work(cwq, &dwork->work);
+		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
+			     &dwork->work);
 		put_cpu();
 	}
 	flush_work(&dwork->work);
@@ -991,13 +1024,11 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
-		return NULL;
+		goto err;
 
 	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
-	if (!wq->cpu_wq) {
-		kfree(wq);
-		return NULL;
-	}
+	if (!wq->cpu_wq)
+		goto err;
 
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
@@ -1041,6 +1072,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		wq = NULL;
 	}
 	return wq;
+err:
+	if (wq) {
+		free_percpu(wq->cpu_wq);
+		kfree(wq);
+	}
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-- 
cgit v1.2.3


From 97e37d7b9e65a6ac939f796f91081135b7a08acc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:10 +0200
Subject: workqueue: merge feature parameters into flags

Currently, __create_workqueue_key() takes @singlethread and
@freezeable paramters and store them separately in workqueue_struct.
Merge them into a single flags parameter and field and use
WQ_FREEZEABLE and WQ_SINGLE_THREAD.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 25 +++++++++++++++----------
 kernel/workqueue.c        | 17 +++++++----------
 2 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e724dafc9e6d..d89cfc143b1a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -184,13 +184,17 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define work_clear_pending(work) \
 	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
 
+enum {
+	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
+	WQ_SINGLE_THREAD	= 1 << 1, /* no per-cpu worker */
+};
 
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, int singlethread, int freezeable,
+__create_workqueue_key(const char *name, unsigned int flags,
 		       struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, singlethread, freezeable)	\
+#define __create_workqueue(name, flags)				\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -200,19 +204,20 @@ __create_workqueue_key(const char *name, int singlethread, int freezeable,
 	else							\
 		__lock_name = #name;				\
 								\
-	__create_workqueue_key((name), (singlethread),		\
-			       (freezeable), &__key,		\
+	__create_workqueue_key((name), (flags), &__key,		\
 			       __lock_name);			\
 })
 #else
-#define __create_workqueue(name, singlethread, freezeable)	\
-	__create_workqueue_key((name), (singlethread), (freezeable), \
-			       NULL, NULL)
+#define __create_workqueue(name, flags)				\
+	__create_workqueue_key((name), (flags), NULL, NULL)
 #endif
 
-#define create_workqueue(name) __create_workqueue((name), 0, 0)
-#define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1)
-#define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0)
+#define create_workqueue(name)					\
+	__create_workqueue((name), 0)
+#define create_freezeable_workqueue(name)			\
+	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD)
+#define create_singlethread_workqueue(name)			\
+	__create_workqueue((name), WQ_SINGLE_THREAD)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c56146a755e5..68e4dd808ec0 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -67,11 +67,10 @@ struct cpu_workqueue_struct {
  * per-CPU workqueues:
  */
 struct workqueue_struct {
+	unsigned int		flags;		/* I: WQ_* flags */
 	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 	const char		*name;		/* I: workqueue name */
-	int singlethread;
-	int freezeable;		/* Freeze threads during suspend */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -203,9 +202,9 @@ static const struct cpumask *cpu_singlethread_map __read_mostly;
 static cpumask_var_t cpu_populated_map __read_mostly;
 
 /* If it's single threaded, it isn't in the list of workqueues. */
-static inline int is_wq_single_threaded(struct workqueue_struct *wq)
+static inline bool is_wq_single_threaded(struct workqueue_struct *wq)
 {
-	return wq->singlethread;
+	return wq->flags & WQ_SINGLE_THREAD;
 }
 
 static const struct cpumask *wq_cpu_map(struct workqueue_struct *wq)
@@ -463,7 +462,7 @@ static int worker_thread(void *__cwq)
 	struct cpu_workqueue_struct *cwq = __cwq;
 	DEFINE_WAIT(wait);
 
-	if (cwq->wq->freezeable)
+	if (cwq->wq->flags & WQ_FREEZEABLE)
 		set_freezable();
 
 	for (;;) {
@@ -1013,8 +1012,7 @@ static void start_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 }
 
 struct workqueue_struct *__create_workqueue_key(const char *name,
-						int singlethread,
-						int freezeable,
+						unsigned int flags,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -1030,13 +1028,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	if (!wq->cpu_wq)
 		goto err;
 
+	wq->flags = flags;
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
-	wq->singlethread = singlethread;
-	wq->freezeable = freezeable;
 	INIT_LIST_HEAD(&wq->list);
 
-	if (singlethread) {
+	if (flags & WQ_SINGLE_THREAD) {
 		cwq = init_cpu_workqueue(wq, singlethread_cpu);
 		err = create_workqueue_thread(cwq, singlethread_cpu);
 		start_workqueue_thread(cwq, -1);
-- 
cgit v1.2.3


From 22df02bb3fab24af97bff4c69cc6fd8529fc66fe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:10 +0200
Subject: workqueue: define masks for work flags and conditionalize STATIC
 flags

Work flags are about to see more traditional mask handling.  Define
WORK_STRUCT_*_BIT as the bit position constant and redefine
WORK_STRUCT_* as bit masks.  Also, make WORK_STRUCT_STATIC_* flags
conditional

While at it, re-define these constants as enums and use
WORK_STRUCT_STATIC instead of hard-coding 2 in
WORK_DATA_STATIC_INIT().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 29 +++++++++++++++++++++--------
 kernel/workqueue.c        | 12 ++++++------
 2 files changed, 27 insertions(+), 14 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d89cfc143b1a..d60c5701ab45 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -22,12 +22,25 @@ typedef void (*work_func_t)(struct work_struct *work);
  */
 #define work_data_bits(work) ((unsigned long *)(&(work)->data))
 
+enum {
+	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+#endif
+
+	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+#ifdef CONFIG_DEBUG_OBJECTS_WORK
+	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
+#else
+	WORK_STRUCT_STATIC	= 0,
+#endif
+
+	WORK_STRUCT_FLAG_MASK	= 3UL,
+	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
+};
+
 struct work_struct {
 	atomic_long_t data;
-#define WORK_STRUCT_PENDING 0		/* T if work item pending execution */
-#define WORK_STRUCT_STATIC  1		/* static initializer (debugobjects) */
-#define WORK_STRUCT_FLAG_MASK (3UL)
-#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
 	struct list_head entry;
 	work_func_t func;
 #ifdef CONFIG_LOCKDEP
@@ -36,7 +49,7 @@ struct work_struct {
 };
 
 #define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
-#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(2)
+#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
@@ -98,7 +111,7 @@ extern void __init_work(struct work_struct *work, int onstack);
 extern void destroy_work_on_stack(struct work_struct *work);
 static inline unsigned int work_static(struct work_struct *work)
 {
-	return *work_data_bits(work) & (1 << WORK_STRUCT_STATIC);
+	return *work_data_bits(work) & WORK_STRUCT_STATIC;
 }
 #else
 static inline void __init_work(struct work_struct *work, int onstack) { }
@@ -167,7 +180,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * @work: The work item in question
  */
 #define work_pending(work) \
-	test_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+	test_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
 /**
  * delayed_work_pending - Find out whether a delayable work item is currently
@@ -182,7 +195,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * @work: The work item in question
  */
 #define work_clear_pending(work) \
-	clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
+	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
 enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 68e4dd808ec0..5c49d762293b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -115,7 +115,7 @@ static int work_fixup_activate(void *addr, enum debug_obj_state state)
 		 * statically initialized. We just make sure that it
 		 * is tracked in the object tracker.
 		 */
-		if (test_bit(WORK_STRUCT_STATIC, work_data_bits(work))) {
+		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
 			debug_object_init(work, &work_debug_descr);
 			debug_object_activate(work, &work_debug_descr);
 			return 0;
@@ -232,7 +232,7 @@ static inline void set_wq_data(struct work_struct *work,
 	BUG_ON(!work_pending(work));
 
 	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
-			(1UL << WORK_STRUCT_PENDING) | extra_flags);
+			WORK_STRUCT_PENDING | extra_flags);
 }
 
 /*
@@ -330,7 +330,7 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 {
 	int ret = 0;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_work(cpu, wq, work);
 		ret = 1;
 	}
@@ -380,7 +380,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work))) {
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -516,7 +516,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	 * might deadlock.
 	 */
 	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
-	__set_bit(WORK_STRUCT_PENDING, work_data_bits(&barr->work));
+	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
 	debug_work_activate(&barr->work);
@@ -628,7 +628,7 @@ static int try_to_grab_pending(struct work_struct *work)
 	struct cpu_workqueue_struct *cwq;
 	int ret = -1;
 
-	if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
+	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
 		return 0;
 
 	/*
-- 
cgit v1.2.3


From 0f900049cbe2767d47c2a62b54f0e822e1d66840 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:11 +0200
Subject: workqueue: update cwq alignement

work->data field is used for two purposes.  It points to cwq it's
queued on and the lower bits are used for flags.  Currently, two bits
are reserved which is always safe as 4 byte alignment is guaranteed on
every architecture.  However, future changes will need more flag bits.

On SMP, the percpu allocator is capable of honoring larger alignment
(there are other users which depend on it) and larger alignment works
just fine.  On UP, percpu allocator is a thin wrapper around
kzalloc/kfree() and don't honor alignment request.

This patch introduces WORK_STRUCT_FLAG_BITS and implements
alloc/free_cwqs() which guarantees max(1 << WORK_STRUCT_FLAG_BITS,
__alignof__(unsigned long long) alignment both on SMP and UP.  On SMP,
simply wrapping percpu allocator is enough.  On UP, extra space is
allocated so that cwq can be aligned and the original pointer can be
stored after it which is used in the free path.

* Alignment problem on UP is reported by Michal Simek.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Reported-by: Michal Simek <michal.simek@petalogix.com>
---
 include/linux/workqueue.h |  5 +++-
 kernel/workqueue.c        | 60 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 6 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d60c5701ab45..b90958a037dc 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -26,6 +26,9 @@ enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+	WORK_STRUCT_FLAG_BITS	= 2,
+#else
+	WORK_STRUCT_FLAG_BITS	= 1,
 #endif
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
@@ -35,7 +38,7 @@ enum {
 	WORK_STRUCT_STATIC	= 0,
 #endif
 
-	WORK_STRUCT_FLAG_MASK	= 3UL,
+	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index dc78956ccf03..74a38499b19a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,7 +46,9 @@
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
- * possible cpu).
+ * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
+ * work_struct->data are used for flags and thus cwqs need to be
+ * aligned at two's power of the number of flag bits.
  */
 struct cpu_workqueue_struct {
 
@@ -59,7 +61,7 @@ struct cpu_workqueue_struct {
 
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	struct task_struct	*thread;
-} ____cacheline_aligned;
+};
 
 /*
  * The externally visible workqueue abstraction is an array of
@@ -967,6 +969,53 @@ int current_is_keventd(void)
 
 }
 
+static struct cpu_workqueue_struct *alloc_cwqs(void)
+{
+	/*
+	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+	 * Make sure that the alignment isn't lower than that of
+	 * unsigned long long.
+	 */
+	const size_t size = sizeof(struct cpu_workqueue_struct);
+	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
+				   __alignof__(unsigned long long));
+	struct cpu_workqueue_struct *cwqs;
+#ifndef CONFIG_SMP
+	void *ptr;
+
+	/*
+	 * On UP, percpu allocator doesn't honor alignment parameter
+	 * and simply uses arch-dependent default.  Allocate enough
+	 * room to align cwq and put an extra pointer at the end
+	 * pointing back to the originally allocated pointer which
+	 * will be used for free.
+	 *
+	 * FIXME: This really belongs to UP percpu code.  Update UP
+	 * percpu code to honor alignment and remove this ugliness.
+	 */
+	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
+	cwqs = PTR_ALIGN(ptr, align);
+	*(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
+#else
+	/* On SMP, percpu allocator can do it itself */
+	cwqs = __alloc_percpu(size, align);
+#endif
+	/* just in case, make sure it's actually aligned */
+	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
+	return cwqs;
+}
+
+static void free_cwqs(struct cpu_workqueue_struct *cwqs)
+{
+#ifndef CONFIG_SMP
+	/* on UP, the pointer to free is stored right after the cwq */
+	if (cwqs)
+		free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
+#else
+	free_percpu(cwqs);
+#endif
+}
+
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
 	struct workqueue_struct *wq = cwq->wq;
@@ -1012,7 +1061,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	if (!wq)
 		goto err;
 
-	wq->cpu_wq = alloc_percpu(struct cpu_workqueue_struct);
+	wq->cpu_wq = alloc_cwqs();
 	if (!wq->cpu_wq)
 		goto err;
 
@@ -1031,6 +1080,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
+		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
 		cwq->wq = wq;
 		cwq->cpu = cpu;
 		spin_lock_init(&cwq->lock);
@@ -1059,7 +1109,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	return wq;
 err:
 	if (wq) {
-		free_percpu(wq->cpu_wq);
+		free_cwqs(wq->cpu_wq);
 		kfree(wq);
 	}
 	return NULL;
@@ -1112,7 +1162,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	for_each_possible_cpu(cpu)
 		cleanup_workqueue_thread(get_cwq(cpu, wq));
 
-	free_percpu(wq->cpu_wq);
+	free_cwqs(wq->cpu_wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
-- 
cgit v1.2.3


From 73f53c4aa732eced5fcb1844d3d452c30905f20f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:11 +0200
Subject: workqueue: reimplement workqueue flushing using color coded works

Reimplement workqueue flushing using color coded works.  wq has the
current work color which is painted on the works being issued via
cwqs.  Flushing a workqueue is achieved by advancing the current work
colors of cwqs and waiting for all the works which have any of the
previous colors to drain.

Currently there are 16 possible colors, one is reserved for no color
and 15 colors are useable allowing 14 concurrent flushes.  When color
space gets full, flush attempts are batched up and processed together
when color frees up, so even with many concurrent flushers, the new
implementation won't build up huge queue of flushers which has to be
processed one after another.

Only works which are queued via __queue_work() are colored.  Works
which are directly put on queue using insert_work() use NO_COLOR and
don't participate in workqueue flushing.  Currently only works used
for work-specific flush fall in this category.

This new implementation leaves only cleanup_workqueue_thread() as the
user of flush_cpu_workqueue().  Just make its users use
flush_workqueue() and kthread_stop() directly and kill
cleanup_workqueue_thread().  As workqueue flushing doesn't use barrier
request anymore, the comment describing the complex synchronization
around it in cleanup_workqueue_thread() is removed together with the
function.

This new implementation is to allow having and sharing multiple
workers per cpu.

Please note that one more bit is reserved for a future work flag by
this patch.  This is to avoid shifting bits and updating comments
later.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  21 ++-
 kernel/workqueue.c        | 355 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 322 insertions(+), 54 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b90958a037dc..8762f62103d8 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -26,11 +26,13 @@ enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
-	WORK_STRUCT_FLAG_BITS	= 2,
+	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_FLAG_BITS	= 1,
+	WORK_STRUCT_COLOR_SHIFT	= 2,	/* color for workqueue flushing */
 #endif
 
+	WORK_STRUCT_COLOR_BITS	= 4,
+
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -38,6 +40,21 @@ enum {
 	WORK_STRUCT_STATIC	= 0,
 #endif
 
+	/*
+	 * The last color is no color used for works which don't
+	 * participate in workqueue flushing.
+	 */
+	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
+	WORK_NO_COLOR		= WORK_NR_COLORS,
+
+	/*
+	 * Reserve 6 bits off of cwq pointer w/ debugobjects turned
+	 * off.  This makes cwqs aligned to 64 bytes which isn't too
+	 * excessive while allowing 15 workqueue flush colors.
+	 */
+	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
+				  WORK_STRUCT_COLOR_BITS,
+
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
 };
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 74a38499b19a..56e47c59d73b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -41,6 +41,8 @@
  *
  * L: cwq->lock protected.  Access with cwq->lock held.
  *
+ * F: wq->flush_mutex protected.
+ *
  * W: workqueue_lock protected.
  */
 
@@ -60,9 +62,22 @@ struct cpu_workqueue_struct {
 	unsigned int		cpu;
 
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
+	int			work_color;	/* L: current color */
+	int			flush_color;	/* L: flushing color */
+	int			nr_in_flight[WORK_NR_COLORS];
+						/* L: nr of in_flight works */
 	struct task_struct	*thread;
 };
 
+/*
+ * Structure used to wait for workqueue flush.
+ */
+struct wq_flusher {
+	struct list_head	list;		/* F: list of flushers */
+	int			flush_color;	/* F: flush color waiting for */
+	struct completion	done;		/* flush completion */
+};
+
 /*
  * The externally visible workqueue abstraction is an array of
  * per-CPU workqueues:
@@ -71,6 +86,15 @@ struct workqueue_struct {
 	unsigned int		flags;		/* I: WQ_* flags */
 	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
 	struct list_head	list;		/* W: list of all workqueues */
+
+	struct mutex		flush_mutex;	/* protects wq flushing */
+	int			work_color;	/* F: current work color */
+	int			flush_color;	/* F: current flush color */
+	atomic_t		nr_cwqs_to_flush; /* flush in progress */
+	struct wq_flusher	*first_flusher;	/* F: first flusher */
+	struct list_head	flusher_queue;	/* F: flush waiters */
+	struct list_head	flusher_overflow; /* F: flush overflow list */
+
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
@@ -207,6 +231,22 @@ static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
 	return get_cwq(cpu, wq);
 }
 
+static unsigned int work_color_to_flags(int color)
+{
+	return color << WORK_STRUCT_COLOR_SHIFT;
+}
+
+static int get_work_color(struct work_struct *work)
+{
+	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
+		((1 << WORK_STRUCT_COLOR_BITS) - 1);
+}
+
+static int work_next_color(int color)
+{
+	return (color + 1) % WORK_NR_COLORS;
+}
+
 /*
  * Set the workqueue on which a work item is to be run
  * - Must *only* be called if the pending flag is set
@@ -273,7 +313,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	debug_work_activate(work);
 	spin_lock_irqsave(&cwq->lock, flags);
 	BUG_ON(!list_empty(&work->entry));
-	insert_work(cwq, work, &cwq->worklist, 0);
+	cwq->nr_in_flight[cwq->work_color]++;
+	insert_work(cwq, work, &cwq->worklist,
+		    work_color_to_flags(cwq->work_color));
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -386,6 +428,44 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
+/**
+ * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
+ * @cwq: cwq of interest
+ * @color: color of work which left the queue
+ *
+ * A work either has completed or is removed from pending queue,
+ * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+{
+	/* ignore uncolored works */
+	if (color == WORK_NO_COLOR)
+		return;
+
+	cwq->nr_in_flight[color]--;
+
+	/* is flush in progress and are we at the flushing tip? */
+	if (likely(cwq->flush_color != color))
+		return;
+
+	/* are there still in-flight works? */
+	if (cwq->nr_in_flight[color])
+		return;
+
+	/* this cwq is done, clear flush_color */
+	cwq->flush_color = -1;
+
+	/*
+	 * If this was the last cwq, wake up the first flusher.  It
+	 * will handle the rest.
+	 */
+	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
+		complete(&cwq->wq->first_flusher->done);
+}
+
 /**
  * process_one_work - process single work
  * @cwq: cwq to process work for
@@ -404,6 +484,7 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 			     struct work_struct *work)
 {
 	work_func_t f = work->func;
+	int work_color;
 #ifdef CONFIG_LOCKDEP
 	/*
 	 * It is permissible to free the struct work_struct from
@@ -417,6 +498,7 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 	/* claim and process */
 	debug_work_deactivate(work);
 	cwq->current_work = work;
+	work_color = get_work_color(work);
 	list_del_init(&work->entry);
 
 	spin_unlock_irq(&cwq->lock);
@@ -443,6 +525,7 @@ static void process_one_work(struct cpu_workqueue_struct *cwq,
 
 	/* we're done with it, release */
 	cwq->current_work = NULL;
+	cwq_dec_nr_in_flight(cwq, work_color);
 }
 
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
@@ -529,29 +612,78 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	init_completion(&barr->done);
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head, 0);
+	insert_work(cwq, &barr->work, head, work_color_to_flags(WORK_NO_COLOR));
 }
 
-static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
+/**
+ * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
+ * @wq: workqueue being flushed
+ * @flush_color: new flush color, < 0 for no-op
+ * @work_color: new work color, < 0 for no-op
+ *
+ * Prepare cwqs for workqueue flushing.
+ *
+ * If @flush_color is non-negative, flush_color on all cwqs should be
+ * -1.  If no cwq has in-flight commands at the specified color, all
+ * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
+ * has in flight commands, its cwq->flush_color is set to
+ * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
+ * wakeup logic is armed and %true is returned.
+ *
+ * The caller should have initialized @wq->first_flusher prior to
+ * calling this function with non-negative @flush_color.  If
+ * @flush_color is negative, no flush color update is done and %false
+ * is returned.
+ *
+ * If @work_color is non-negative, all cwqs should have the same
+ * work_color which is previous to @work_color and all will be
+ * advanced to @work_color.
+ *
+ * CONTEXT:
+ * mutex_lock(wq->flush_mutex).
+ *
+ * RETURNS:
+ * %true if @flush_color >= 0 and there's something to flush.  %false
+ * otherwise.
+ */
+static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
+				      int flush_color, int work_color)
 {
-	int active = 0;
-	struct wq_barrier barr;
+	bool wait = false;
+	unsigned int cpu;
 
-	WARN_ON(cwq->thread == current);
-
-	spin_lock_irq(&cwq->lock);
-	if (!list_empty(&cwq->worklist) || cwq->current_work != NULL) {
-		insert_wq_barrier(cwq, &barr, &cwq->worklist);
-		active = 1;
+	if (flush_color >= 0) {
+		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
+		atomic_set(&wq->nr_cwqs_to_flush, 1);
 	}
-	spin_unlock_irq(&cwq->lock);
 
-	if (active) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
+	for_each_possible_cpu(cpu) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+		spin_lock_irq(&cwq->lock);
+
+		if (flush_color >= 0) {
+			BUG_ON(cwq->flush_color != -1);
+
+			if (cwq->nr_in_flight[flush_color]) {
+				cwq->flush_color = flush_color;
+				atomic_inc(&wq->nr_cwqs_to_flush);
+				wait = true;
+			}
+		}
+
+		if (work_color >= 0) {
+			BUG_ON(work_color != work_next_color(cwq->work_color));
+			cwq->work_color = work_color;
+		}
+
+		spin_unlock_irq(&cwq->lock);
 	}
 
-	return active;
+	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
+		complete(&wq->first_flusher->done);
+
+	return wait;
 }
 
 /**
@@ -566,13 +698,143 @@ static int flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
  */
 void flush_workqueue(struct workqueue_struct *wq)
 {
-	int cpu;
+	struct wq_flusher this_flusher = {
+		.list = LIST_HEAD_INIT(this_flusher.list),
+		.flush_color = -1,
+		.done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
+	};
+	int next_color;
 
-	might_sleep();
 	lock_map_acquire(&wq->lockdep_map);
 	lock_map_release(&wq->lockdep_map);
-	for_each_possible_cpu(cpu)
-		flush_cpu_workqueue(get_cwq(cpu, wq));
+
+	mutex_lock(&wq->flush_mutex);
+
+	/*
+	 * Start-to-wait phase
+	 */
+	next_color = work_next_color(wq->work_color);
+
+	if (next_color != wq->flush_color) {
+		/*
+		 * Color space is not full.  The current work_color
+		 * becomes our flush_color and work_color is advanced
+		 * by one.
+		 */
+		BUG_ON(!list_empty(&wq->flusher_overflow));
+		this_flusher.flush_color = wq->work_color;
+		wq->work_color = next_color;
+
+		if (!wq->first_flusher) {
+			/* no flush in progress, become the first flusher */
+			BUG_ON(wq->flush_color != this_flusher.flush_color);
+
+			wq->first_flusher = &this_flusher;
+
+			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
+						       wq->work_color)) {
+				/* nothing to flush, done */
+				wq->flush_color = next_color;
+				wq->first_flusher = NULL;
+				goto out_unlock;
+			}
+		} else {
+			/* wait in queue */
+			BUG_ON(wq->flush_color == this_flusher.flush_color);
+			list_add_tail(&this_flusher.list, &wq->flusher_queue);
+			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+		}
+	} else {
+		/*
+		 * Oops, color space is full, wait on overflow queue.
+		 * The next flush completion will assign us
+		 * flush_color and transfer to flusher_queue.
+		 */
+		list_add_tail(&this_flusher.list, &wq->flusher_overflow);
+	}
+
+	mutex_unlock(&wq->flush_mutex);
+
+	wait_for_completion(&this_flusher.done);
+
+	/*
+	 * Wake-up-and-cascade phase
+	 *
+	 * First flushers are responsible for cascading flushes and
+	 * handling overflow.  Non-first flushers can simply return.
+	 */
+	if (wq->first_flusher != &this_flusher)
+		return;
+
+	mutex_lock(&wq->flush_mutex);
+
+	wq->first_flusher = NULL;
+
+	BUG_ON(!list_empty(&this_flusher.list));
+	BUG_ON(wq->flush_color != this_flusher.flush_color);
+
+	while (true) {
+		struct wq_flusher *next, *tmp;
+
+		/* complete all the flushers sharing the current flush color */
+		list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
+			if (next->flush_color != wq->flush_color)
+				break;
+			list_del_init(&next->list);
+			complete(&next->done);
+		}
+
+		BUG_ON(!list_empty(&wq->flusher_overflow) &&
+		       wq->flush_color != work_next_color(wq->work_color));
+
+		/* this flush_color is finished, advance by one */
+		wq->flush_color = work_next_color(wq->flush_color);
+
+		/* one color has been freed, handle overflow queue */
+		if (!list_empty(&wq->flusher_overflow)) {
+			/*
+			 * Assign the same color to all overflowed
+			 * flushers, advance work_color and append to
+			 * flusher_queue.  This is the start-to-wait
+			 * phase for these overflowed flushers.
+			 */
+			list_for_each_entry(tmp, &wq->flusher_overflow, list)
+				tmp->flush_color = wq->work_color;
+
+			wq->work_color = work_next_color(wq->work_color);
+
+			list_splice_tail_init(&wq->flusher_overflow,
+					      &wq->flusher_queue);
+			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+		}
+
+		if (list_empty(&wq->flusher_queue)) {
+			BUG_ON(wq->flush_color != wq->work_color);
+			break;
+		}
+
+		/*
+		 * Need to flush more colors.  Make the next flusher
+		 * the new first flusher and arm cwqs.
+		 */
+		BUG_ON(wq->flush_color == wq->work_color);
+		BUG_ON(wq->flush_color != next->flush_color);
+
+		list_del_init(&next->list);
+		wq->first_flusher = next;
+
+		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
+			break;
+
+		/*
+		 * Meh... this color is already done, clear first
+		 * flusher and repeat cascading.
+		 */
+		wq->first_flusher = NULL;
+	}
+
+out_unlock:
+	mutex_unlock(&wq->flush_mutex);
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
@@ -659,6 +921,7 @@ static int try_to_grab_pending(struct work_struct *work)
 		if (cwq == get_wq_data(work)) {
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
+			cwq_dec_nr_in_flight(cwq, get_work_color(work));
 			ret = 1;
 		}
 	}
@@ -1066,6 +1329,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		goto err;
 
 	wq->flags = flags;
+	mutex_init(&wq->flush_mutex);
+	atomic_set(&wq->nr_cwqs_to_flush, 0);
+	INIT_LIST_HEAD(&wq->flusher_queue);
+	INIT_LIST_HEAD(&wq->flusher_overflow);
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
@@ -1083,6 +1350,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
 		cwq->wq = wq;
 		cwq->cpu = cpu;
+		cwq->flush_color = -1;
 		spin_lock_init(&cwq->lock);
 		INIT_LIST_HEAD(&cwq->worklist);
 		init_waitqueue_head(&cwq->more_work);
@@ -1116,33 +1384,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
-{
-	/*
-	 * Our caller is either destroy_workqueue() or CPU_POST_DEAD,
-	 * cpu_add_remove_lock protects cwq->thread.
-	 */
-	if (cwq->thread == NULL)
-		return;
-
-	lock_map_acquire(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
-
-	flush_cpu_workqueue(cwq);
-	/*
-	 * If the caller is CPU_POST_DEAD and cwq->worklist was not empty,
-	 * a concurrent flush_workqueue() can insert a barrier after us.
-	 * However, in that case run_workqueue() won't return and check
-	 * kthread_should_stop() until it flushes all work_struct's.
-	 * When ->worklist becomes empty it is safe to exit because no
-	 * more work_structs can be queued on this cwq: flush_workqueue
-	 * checks list_empty(), and a "normal" queue_work() can't use
-	 * a dead CPU.
-	 */
-	kthread_stop(cwq->thread);
-	cwq->thread = NULL;
-}
-
 /**
  * destroy_workqueue - safely terminate a workqueue
  * @wq: target workqueue
@@ -1159,8 +1400,20 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 	cpu_maps_update_done();
 
-	for_each_possible_cpu(cpu)
-		cleanup_workqueue_thread(get_cwq(cpu, wq));
+	flush_workqueue(wq);
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		int i;
+
+		if (cwq->thread) {
+			kthread_stop(cwq->thread);
+			cwq->thread = NULL;
+		}
+
+		for (i = 0; i < WORK_NR_COLORS; i++)
+			BUG_ON(cwq->nr_in_flight[i]);
+	}
 
 	free_cwqs(wq->cpu_wq);
 	kfree(wq);
@@ -1185,9 +1438,7 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 
 		switch (action) {
 		case CPU_POST_DEAD:
-			lock_map_acquire(&cwq->wq->lockdep_map);
-			lock_map_release(&cwq->wq->lockdep_map);
-			flush_cpu_workqueue(cwq);
+			flush_workqueue(wq);
 			break;
 		}
 	}
-- 
cgit v1.2.3


From affee4b294a0fc97d67c8a77dc080c4dd262a79e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: reimplement work flushing using linked works

A work is linked to the next one by having WORK_STRUCT_LINKED bit set
and these links can be chained.  When a linked work is dispatched to a
worker, all linked works are dispatched to the worker's newly added
->scheduled queue and processed back-to-back.

Currently, as there's only single worker per cwq, having linked works
doesn't make any visible behavior difference.  This change is to
prepare for multiple shared workers per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   4 +-
 kernel/workqueue.c        | 152 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 134 insertions(+), 22 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8762f62103d8..4f4fdba722c3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -24,8 +24,9 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
+	WORK_STRUCT_LINKED_BIT	= 1,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 1,	/* static initializer (debugobjects) */
+	WORK_STRUCT_STATIC_BIT	= 2,	/* static initializer (debugobjects) */
 	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 #else
 	WORK_STRUCT_COLOR_SHIFT	= 2,	/* color for workqueue flushing */
@@ -34,6 +35,7 @@ enum {
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
 #else
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 600db10a4dbf..9953d3c7bd10 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -51,6 +51,7 @@ struct cpu_workqueue_struct;
 
 struct worker {
 	struct work_struct	*current_work;	/* L: work being processed */
+	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct cpu_workqueue_struct *cwq;	/* I: the associated cwq */
 	int			id;		/* I: worker id */
@@ -445,6 +446,8 @@ static struct worker *alloc_worker(void)
 	struct worker *worker;
 
 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
+	if (worker)
+		INIT_LIST_HEAD(&worker->scheduled);
 	return worker;
 }
 
@@ -530,6 +533,7 @@ static void destroy_worker(struct worker *worker)
 
 	/* sanity check frenzy */
 	BUG_ON(worker->current_work);
+	BUG_ON(!list_empty(&worker->scheduled));
 
 	kthread_stop(worker->task);
 	kfree(worker);
@@ -539,6 +543,47 @@ static void destroy_worker(struct worker *worker)
 	spin_unlock(&workqueue_lock);
 }
 
+/**
+ * move_linked_works - move linked works to a list
+ * @work: start of series of works to be scheduled
+ * @head: target list to append @work to
+ * @nextp: out paramter for nested worklist walking
+ *
+ * Schedule linked works starting from @work to @head.  Work series to
+ * be scheduled starts at @work and includes any consecutive work with
+ * WORK_STRUCT_LINKED set in its predecessor.
+ *
+ * If @nextp is not NULL, it's updated to point to the next work of
+ * the last scheduled work.  This allows move_linked_works() to be
+ * nested inside outer list_for_each_entry_safe().
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock).
+ */
+static void move_linked_works(struct work_struct *work, struct list_head *head,
+			      struct work_struct **nextp)
+{
+	struct work_struct *n;
+
+	/*
+	 * Linked worklist will always end before the end of the list,
+	 * use NULL for list head.
+	 */
+	list_for_each_entry_safe_from(work, n, NULL, entry) {
+		list_move_tail(&work->entry, head);
+		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
+			break;
+	}
+
+	/*
+	 * If we're already inside safe list traversal and have moved
+	 * multiple works to the scheduled queue, the next position
+	 * needs to be updated.
+	 */
+	if (nextp)
+		*nextp = n;
+}
+
 /**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
@@ -639,17 +684,25 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	cwq_dec_nr_in_flight(cwq, work_color);
 }
 
-static void run_workqueue(struct worker *worker)
+/**
+ * process_scheduled_works - process scheduled works
+ * @worker: self
+ *
+ * Process all scheduled works.  Please note that the scheduled list
+ * may change while processing a work, so this function repeatedly
+ * fetches a work from the top and executes it.
+ *
+ * CONTEXT:
+ * spin_lock_irq(cwq->lock) which may be released and regrabbed
+ * multiple times.
+ */
+static void process_scheduled_works(struct worker *worker)
 {
-	struct cpu_workqueue_struct *cwq = worker->cwq;
-
-	spin_lock_irq(&cwq->lock);
-	while (!list_empty(&cwq->worklist)) {
-		struct work_struct *work = list_entry(cwq->worklist.next,
+	while (!list_empty(&worker->scheduled)) {
+		struct work_struct *work = list_first_entry(&worker->scheduled,
 						struct work_struct, entry);
 		process_one_work(worker, work);
 	}
-	spin_unlock_irq(&cwq->lock);
 }
 
 /**
@@ -684,7 +737,28 @@ static int worker_thread(void *__worker)
 					    get_cpu_mask(cwq->cpu))))
 			set_cpus_allowed_ptr(worker->task,
 					     get_cpu_mask(cwq->cpu));
-		run_workqueue(worker);
+
+		spin_lock_irq(&cwq->lock);
+
+		while (!list_empty(&cwq->worklist)) {
+			struct work_struct *work =
+				list_first_entry(&cwq->worklist,
+						 struct work_struct, entry);
+
+			if (likely(!(*work_data_bits(work) &
+				     WORK_STRUCT_LINKED))) {
+				/* optimization path, not strictly necessary */
+				process_one_work(worker, work);
+				if (unlikely(!list_empty(&worker->scheduled)))
+					process_scheduled_works(worker);
+			} else {
+				move_linked_works(work, &worker->scheduled,
+						  NULL);
+				process_scheduled_works(worker);
+			}
+		}
+
+		spin_unlock_irq(&cwq->lock);
 	}
 
 	return 0;
@@ -705,16 +779,33 @@ static void wq_barrier_func(struct work_struct *work)
  * insert_wq_barrier - insert a barrier work
  * @cwq: cwq to insert barrier into
  * @barr: wq_barrier to insert
- * @head: insertion point
+ * @target: target work to attach @barr to
+ * @worker: worker currently executing @target, NULL if @target is not executing
  *
- * Insert barrier @barr into @cwq before @head.
+ * @barr is linked to @target such that @barr is completed only after
+ * @target finishes execution.  Please note that the ordering
+ * guarantee is observed only with respect to @target and on the local
+ * cpu.
+ *
+ * Currently, a queued barrier can't be canceled.  This is because
+ * try_to_grab_pending() can't determine whether the work to be
+ * grabbed is at the head of the queue and thus can't clear LINKED
+ * flag of the previous work while there must be a valid next work
+ * after a work with LINKED flag set.
+ *
+ * Note that when @worker is non-NULL, @target may be modified
+ * underneath us, so we can't reliably determine cwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(cwq->lock).
  */
 static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
-			struct wq_barrier *barr, struct list_head *head)
+			      struct wq_barrier *barr,
+			      struct work_struct *target, struct worker *worker)
 {
+	struct list_head *head;
+	unsigned int linked = 0;
+
 	/*
 	 * debugobject calls are safe here even with cwq->lock locked
 	 * as we know for sure that this will not trigger any of the
@@ -725,8 +816,24 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
+	/*
+	 * If @target is currently being executed, schedule the
+	 * barrier to the worker; otherwise, put it after @target.
+	 */
+	if (worker)
+		head = worker->scheduled.next;
+	else {
+		unsigned long *bits = work_data_bits(target);
+
+		head = target->entry.next;
+		/* there can already be other linked works, inherit and set */
+		linked = *bits & WORK_STRUCT_LINKED;
+		__set_bit(WORK_STRUCT_LINKED_BIT, bits);
+	}
+
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head, work_color_to_flags(WORK_NO_COLOR));
+	insert_work(cwq, &barr->work, head,
+		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
@@ -964,8 +1071,8 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
  */
 int flush_work(struct work_struct *work)
 {
+	struct worker *worker = NULL;
 	struct cpu_workqueue_struct *cwq;
-	struct list_head *prev;
 	struct wq_barrier barr;
 
 	might_sleep();
@@ -985,14 +1092,14 @@ int flush_work(struct work_struct *work)
 		smp_rmb();
 		if (unlikely(cwq != get_wq_data(work)))
 			goto already_gone;
-		prev = &work->entry;
 	} else {
-		if (!cwq->worker || cwq->worker->current_work != work)
+		if (cwq->worker && cwq->worker->current_work == work)
+			worker = cwq->worker;
+		if (!worker)
 			goto already_gone;
-		prev = &cwq->worklist;
 	}
-	insert_wq_barrier(cwq, &barr, prev->next);
 
+	insert_wq_barrier(cwq, &barr, work, worker);
 	spin_unlock_irq(&cwq->lock);
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
@@ -1048,16 +1155,19 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 				struct work_struct *work)
 {
 	struct wq_barrier barr;
-	int running = 0;
+	struct worker *worker;
 
 	spin_lock_irq(&cwq->lock);
+
+	worker = NULL;
 	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
-		insert_wq_barrier(cwq, &barr, cwq->worklist.next);
-		running = 1;
+		worker = cwq->worker;
+		insert_wq_barrier(cwq, &barr, work, worker);
 	}
+
 	spin_unlock_irq(&cwq->lock);
 
-	if (unlikely(running)) {
+	if (unlikely(worker)) {
 		wait_for_completion(&barr.done);
 		destroy_work_on_stack(&barr.work);
 	}
-- 
cgit v1.2.3


From 1e19ffc63dbbaea7a7d1c63d99c38d3e5a4c7edf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: implement per-cwq active work limit

Add cwq->nr_active, cwq->max_active and cwq->delayed_work.  nr_active
counts the number of active works per cwq.  A work is active if it's
flushable (colored) and is on cwq's worklist.  If nr_active reaches
max_active, new works are queued on cwq->delayed_work and activated
later as works on the cwq complete and decrement nr_active.

cwq->max_active can be specified via the new @max_active parameter to
__create_workqueue() and is set to 1 for all workqueues for now.  As
each cwq has only single worker now, this double queueing doesn't
cause any behavior difference visible to its users.

This will be used to reimplement freeze/thaw and implement shared
worker pool.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 18 +++++++++---------
 kernel/workqueue.c        | 39 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 46 insertions(+), 11 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f4fdba722c3..eb753b7790e5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -225,11 +225,11 @@ enum {
 };
 
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, unsigned int flags,
+__create_workqueue_key(const char *name, unsigned int flags, int max_active,
 		       struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, flags)				\
+#define __create_workqueue(name, flags, max_active)		\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -239,20 +239,20 @@ __create_workqueue_key(const char *name, unsigned int flags,
 	else							\
 		__lock_name = #name;				\
 								\
-	__create_workqueue_key((name), (flags), &__key,		\
-			       __lock_name);			\
+	__create_workqueue_key((name), (flags), (max_active),	\
+				&__key, __lock_name);		\
 })
 #else
-#define __create_workqueue(name, flags)				\
-	__create_workqueue_key((name), (flags), NULL, NULL)
+#define __create_workqueue(name, flags, max_active)		\
+	__create_workqueue_key((name), (flags), (max_active), NULL, NULL)
 #endif
 
 #define create_workqueue(name)					\
-	__create_workqueue((name), 0)
+	__create_workqueue((name), 0, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD)
+	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_THREAD)
+	__create_workqueue((name), WQ_SINGLE_THREAD, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9953d3c7bd10..e541b5db67dd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -77,6 +77,9 @@ struct cpu_workqueue_struct {
 	int			flush_color;	/* L: flushing color */
 	int			nr_in_flight[WORK_NR_COLORS];
 						/* L: nr of in_flight works */
+	int			nr_active;	/* L: nr of active works */
+	int			max_active;	/* I: max active works */
+	struct list_head	delayed_works;	/* L: delayed works */
 };
 
 /*
@@ -321,14 +324,24 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
 	struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
+	struct list_head *worklist;
 	unsigned long flags;
 
 	debug_work_activate(work);
+
 	spin_lock_irqsave(&cwq->lock, flags);
 	BUG_ON(!list_empty(&work->entry));
+
 	cwq->nr_in_flight[cwq->work_color]++;
-	insert_work(cwq, work, &cwq->worklist,
-		    work_color_to_flags(cwq->work_color));
+
+	if (likely(cwq->nr_active < cwq->max_active)) {
+		cwq->nr_active++;
+		worklist = &cwq->worklist;
+	} else
+		worklist = &cwq->delayed_works;
+
+	insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
+
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
@@ -584,6 +597,15 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
+static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+{
+	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+						    struct work_struct, entry);
+
+	move_linked_works(work, &cwq->worklist, NULL);
+	cwq->nr_active++;
+}
+
 /**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
@@ -602,6 +624,12 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 		return;
 
 	cwq->nr_in_flight[color]--;
+	cwq->nr_active--;
+
+	/* one down, submit a delayed one */
+	if (!list_empty(&cwq->delayed_works) &&
+	    cwq->nr_active < cwq->max_active)
+		cwq_activate_first_delayed(cwq);
 
 	/* is flush in progress and are we at the flushing tip? */
 	if (likely(cwq->flush_color != color))
@@ -1505,6 +1533,7 @@ static void free_cwqs(struct cpu_workqueue_struct *cwqs)
 
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						unsigned int flags,
+						int max_active,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
@@ -1513,6 +1542,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	bool failed = false;
 	unsigned int cpu;
 
+	max_active = clamp_val(max_active, 1, INT_MAX);
+
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
 		goto err;
@@ -1544,8 +1575,10 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		cwq->cpu = cpu;
 		cwq->wq = wq;
 		cwq->flush_color = -1;
+		cwq->max_active = max_active;
 		spin_lock_init(&cwq->lock);
 		INIT_LIST_HEAD(&cwq->worklist);
+		INIT_LIST_HEAD(&cwq->delayed_works);
 		init_waitqueue_head(&cwq->more_work);
 
 		if (failed)
@@ -1607,6 +1640,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
 			BUG_ON(cwq->nr_in_flight[i]);
+		BUG_ON(cwq->nr_active);
+		BUG_ON(!list_empty(&cwq->delayed_works));
 	}
 
 	free_cwqs(wq->cpu_wq);
-- 
cgit v1.2.3


From a0a1a5fd4fb15ec61117c759fe9f5c16c53d9e9c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:12 +0200
Subject: workqueue: reimplement workqueue freeze using max_active

Currently, workqueue freezing is implemented by marking the worker
freezeable and calling try_to_freeze() from dispatch loop.
Reimplement it using cwq->limit so that the workqueue is frozen
instead of the worker.

* workqueue_struct->saved_max_active is added which stores the
  specified max_active on initialization.

* On freeze, all cwq->max_active's are quenched to zero.  Freezing is
  complete when nr_active on all cwqs reach zero.

* On thaw, all cwq->max_active's are restored to wq->saved_max_active
  and the worklist is repopulated.

This new implementation allows having single shared pool of workers
per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   7 ++
 kernel/power/process.c    |  21 +++++-
 kernel/workqueue.c        | 163 +++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 179 insertions(+), 12 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index eb753b7790e5..ab0b7fb99bc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -340,4 +340,11 @@ static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 #else
 long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
 #endif /* CONFIG_SMP */
+
+#ifdef CONFIG_FREEZER
+extern void freeze_workqueues_begin(void);
+extern bool freeze_workqueues_busy(void);
+extern void thaw_workqueues(void);
+#endif /* CONFIG_FREEZER */
+
 #endif
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 71ae29052ab6..028a99598f49 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -15,6 +15,7 @@
 #include <linux/syscalls.h>
 #include <linux/freezer.h>
 #include <linux/delay.h>
+#include <linux/workqueue.h>
 
 /* 
  * Timeout for stopping processes
@@ -35,6 +36,7 @@ static int try_to_freeze_tasks(bool sig_only)
 	struct task_struct *g, *p;
 	unsigned long end_time;
 	unsigned int todo;
+	bool wq_busy = false;
 	struct timeval start, end;
 	u64 elapsed_csecs64;
 	unsigned int elapsed_csecs;
@@ -42,6 +44,10 @@ static int try_to_freeze_tasks(bool sig_only)
 	do_gettimeofday(&start);
 
 	end_time = jiffies + TIMEOUT;
+
+	if (!sig_only)
+		freeze_workqueues_begin();
+
 	while (true) {
 		todo = 0;
 		read_lock(&tasklist_lock);
@@ -63,6 +69,12 @@ static int try_to_freeze_tasks(bool sig_only)
 				todo++;
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
+
+		if (!sig_only) {
+			wq_busy = freeze_workqueues_busy();
+			todo += wq_busy;
+		}
+
 		if (!todo || time_after(jiffies, end_time))
 			break;
 
@@ -86,8 +98,12 @@ static int try_to_freeze_tasks(bool sig_only)
 		 */
 		printk("\n");
 		printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
-				"(%d tasks refusing to freeze):\n",
-				elapsed_csecs / 100, elapsed_csecs % 100, todo);
+		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
+		       elapsed_csecs / 100, elapsed_csecs % 100,
+		       todo - wq_busy, wq_busy);
+
+		thaw_workqueues();
+
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			task_lock(p);
@@ -157,6 +173,7 @@ void thaw_processes(void)
 	oom_killer_enable();
 
 	printk("Restarting tasks ... ");
+	thaw_workqueues();
 	thaw_tasks(true);
 	thaw_tasks(false);
 	schedule();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e541b5db67dd..4d059c532792 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -78,7 +78,7 @@ struct cpu_workqueue_struct {
 	int			nr_in_flight[WORK_NR_COLORS];
 						/* L: nr of in_flight works */
 	int			nr_active;	/* L: nr of active works */
-	int			max_active;	/* I: max active works */
+	int			max_active;	/* L: max active works */
 	struct list_head	delayed_works;	/* L: delayed works */
 };
 
@@ -108,6 +108,7 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
+	int			saved_max_active; /* I: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
@@ -228,6 +229,7 @@ static inline void debug_work_deactivate(struct work_struct *work) { }
 static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 static DEFINE_PER_CPU(struct ida, worker_ida);
+static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
 static int worker_thread(void *__worker);
 
@@ -745,19 +747,13 @@ static int worker_thread(void *__worker)
 	struct cpu_workqueue_struct *cwq = worker->cwq;
 	DEFINE_WAIT(wait);
 
-	if (cwq->wq->flags & WQ_FREEZEABLE)
-		set_freezable();
-
 	for (;;) {
 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
-		if (!freezing(current) &&
-		    !kthread_should_stop() &&
+		if (!kthread_should_stop() &&
 		    list_empty(&cwq->worklist))
 			schedule();
 		finish_wait(&cwq->more_work, &wait);
 
-		try_to_freeze();
-
 		if (kthread_should_stop())
 			break;
 
@@ -1553,6 +1549,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		goto err;
 
 	wq->flags = flags;
+	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
@@ -1591,8 +1588,19 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 			failed = true;
 	}
 
+	/*
+	 * workqueue_lock protects global freeze state and workqueues
+	 * list.  Grab it, set max_active accordingly and add the new
+	 * workqueue to workqueues list.
+	 */
 	spin_lock(&workqueue_lock);
+
+	if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
+		for_each_possible_cpu(cpu)
+			get_cwq(cpu, wq)->max_active = 0;
+
 	list_add(&wq->list, &workqueues);
+
 	spin_unlock(&workqueue_lock);
 
 	cpu_maps_update_done();
@@ -1621,14 +1629,18 @@ void destroy_workqueue(struct workqueue_struct *wq)
 {
 	int cpu;
 
+	flush_workqueue(wq);
+
+	/*
+	 * wq list is used to freeze wq, remove from list after
+	 * flushing is complete in case freeze races us.
+	 */
 	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
 	cpu_maps_update_done();
 
-	flush_workqueue(wq);
-
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		int i;
@@ -1722,6 +1734,137 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 EXPORT_SYMBOL_GPL(work_on_cpu);
 #endif /* CONFIG_SMP */
 
+#ifdef CONFIG_FREEZER
+
+/**
+ * freeze_workqueues_begin - begin freezing workqueues
+ *
+ * Start freezing workqueues.  After this function returns, all
+ * freezeable workqueues will queue new works to their frozen_works
+ * list instead of the cwq ones.
+ *
+ * CONTEXT:
+ * Grabs and releases workqueue_lock and cwq->lock's.
+ */
+void freeze_workqueues_begin(void)
+{
+	struct workqueue_struct *wq;
+	unsigned int cpu;
+
+	spin_lock(&workqueue_lock);
+
+	BUG_ON(workqueue_freezing);
+	workqueue_freezing = true;
+
+	for_each_possible_cpu(cpu) {
+		list_for_each_entry(wq, &workqueues, list) {
+			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+			spin_lock_irq(&cwq->lock);
+
+			if (wq->flags & WQ_FREEZEABLE)
+				cwq->max_active = 0;
+
+			spin_unlock_irq(&cwq->lock);
+		}
+	}
+
+	spin_unlock(&workqueue_lock);
+}
+
+/**
+ * freeze_workqueues_busy - are freezeable workqueues still busy?
+ *
+ * Check whether freezing is complete.  This function must be called
+ * between freeze_workqueues_begin() and thaw_workqueues().
+ *
+ * CONTEXT:
+ * Grabs and releases workqueue_lock.
+ *
+ * RETURNS:
+ * %true if some freezeable workqueues are still busy.  %false if
+ * freezing is complete.
+ */
+bool freeze_workqueues_busy(void)
+{
+	struct workqueue_struct *wq;
+	unsigned int cpu;
+	bool busy = false;
+
+	spin_lock(&workqueue_lock);
+
+	BUG_ON(!workqueue_freezing);
+
+	for_each_possible_cpu(cpu) {
+		/*
+		 * nr_active is monotonically decreasing.  It's safe
+		 * to peek without lock.
+		 */
+		list_for_each_entry(wq, &workqueues, list) {
+			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+			if (!(wq->flags & WQ_FREEZEABLE))
+				continue;
+
+			BUG_ON(cwq->nr_active < 0);
+			if (cwq->nr_active) {
+				busy = true;
+				goto out_unlock;
+			}
+		}
+	}
+out_unlock:
+	spin_unlock(&workqueue_lock);
+	return busy;
+}
+
+/**
+ * thaw_workqueues - thaw workqueues
+ *
+ * Thaw workqueues.  Normal queueing is restored and all collected
+ * frozen works are transferred to their respective cwq worklists.
+ *
+ * CONTEXT:
+ * Grabs and releases workqueue_lock and cwq->lock's.
+ */
+void thaw_workqueues(void)
+{
+	struct workqueue_struct *wq;
+	unsigned int cpu;
+
+	spin_lock(&workqueue_lock);
+
+	if (!workqueue_freezing)
+		goto out_unlock;
+
+	for_each_possible_cpu(cpu) {
+		list_for_each_entry(wq, &workqueues, list) {
+			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+			if (!(wq->flags & WQ_FREEZEABLE))
+				continue;
+
+			spin_lock_irq(&cwq->lock);
+
+			/* restore max_active and repopulate worklist */
+			cwq->max_active = wq->saved_max_active;
+
+			while (!list_empty(&cwq->delayed_works) &&
+			       cwq->nr_active < cwq->max_active)
+				cwq_activate_first_delayed(cwq);
+
+			wake_up(&cwq->more_work);
+
+			spin_unlock_irq(&cwq->lock);
+		}
+	}
+
+	workqueue_freezing = false;
+out_unlock:
+	spin_unlock(&workqueue_lock);
+}
+#endif /* CONFIG_FREEZER */
+
 void __init init_workqueues(void)
 {
 	unsigned int cpu;
-- 
cgit v1.2.3


From 502ca9d819792e7d79b6e002afe9094c641fe410 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:13 +0200
Subject: workqueue: make single thread workqueue shared worker pool friendly

Reimplement st (single thread) workqueue so that it's friendly to
shared worker pool.  It was originally implemented by confining st
workqueues to use cwq of a fixed cpu and always having a worker for
the cpu.  This implementation isn't very friendly to shared worker
pool and suboptimal in that it ends up crossing cpu boundaries often.

Reimplement st workqueue using dynamic single cpu binding and
cwq->limit.  WQ_SINGLE_THREAD is replaced with WQ_SINGLE_CPU.  In a
single cpu workqueue, at most single cwq is bound to the wq at any
given time.  Arbitration is done using atomic accesses to
wq->single_cpu when queueing a work.  Once bound, the binding stays
till the workqueue is drained.

Note that the binding is never broken while a workqueue is frozen.
This is because idle cwqs may have works waiting in delayed_works
queue while frozen.  On thaw, the cwq is restarted if there are any
delayed works or unbound otherwise.

When combined with max_active limit of 1, single cpu workqueue has
exactly the same execution properties as the original single thread
workqueue while allowing sharing of per-cpu workers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   6 +--
 kernel/workqueue.c        | 135 ++++++++++++++++++++++++++++++++++------------
 2 files changed, 103 insertions(+), 38 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ab0b7fb99bc2..10611f7fc809 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -221,7 +221,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 
 enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
-	WQ_SINGLE_THREAD	= 1 << 1, /* no per-cpu worker */
+	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 };
 
 extern struct workqueue_struct *
@@ -250,9 +250,9 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active,
 #define create_workqueue(name)					\
 	__create_workqueue((name), 0, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_THREAD, 1)
+	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_THREAD, 1)
+	__create_workqueue((name), WQ_SINGLE_CPU, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f57855f718d7..cfb8aa567e17 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -114,8 +114,7 @@ struct global_cwq {
 } ____cacheline_aligned_in_smp;
 
 /*
- * The per-CPU workqueue (if single thread, we always use the first
- * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
+ * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
  * work_struct->data are used for flags and thus cwqs need to be
  * aligned at two's power of the number of flag bits.
  */
@@ -159,6 +158,8 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
+	unsigned long		single_cpu;	/* cpu for single cpu wq */
+
 	int			saved_max_active; /* I: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
@@ -289,8 +290,6 @@ static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 
 static int worker_thread(void *__worker);
 
-static int singlethread_cpu __read_mostly;
-
 static struct global_cwq *get_gcwq(unsigned int cpu)
 {
 	return &per_cpu(global_cwq, cpu);
@@ -302,14 +301,6 @@ static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 	return per_cpu_ptr(wq->cpu_wq, cpu);
 }
 
-static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
-					       struct workqueue_struct *wq)
-{
-	if (unlikely(wq->flags & WQ_SINGLE_THREAD))
-		cpu = singlethread_cpu;
-	return get_cwq(cpu, wq);
-}
-
 static unsigned int work_color_to_flags(int color)
 {
 	return color << WORK_STRUCT_COLOR_SHIFT;
@@ -410,17 +401,87 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	wake_up_process(cwq->worker->task);
 }
 
+/**
+ * cwq_unbind_single_cpu - unbind cwq from single cpu workqueue processing
+ * @cwq: cwq to unbind
+ *
+ * Try to unbind @cwq from single cpu workqueue processing.  If
+ * @cwq->wq is frozen, unbind is delayed till the workqueue is thawed.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ */
+static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
+{
+	struct workqueue_struct *wq = cwq->wq;
+	struct global_cwq *gcwq = cwq->gcwq;
+
+	BUG_ON(wq->single_cpu != gcwq->cpu);
+	/*
+	 * Unbind from workqueue if @cwq is not frozen.  If frozen,
+	 * thaw_workqueues() will either restart processing on this
+	 * cpu or unbind if empty.  This keeps works queued while
+	 * frozen fully ordered and flushable.
+	 */
+	if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
+		smp_wmb();	/* paired with cmpxchg() in __queue_work() */
+		wq->single_cpu = NR_CPUS;
+	}
+}
+
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
-	struct global_cwq *gcwq = cwq->gcwq;
+	struct global_cwq *gcwq;
+	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned long flags;
+	bool arbitrate;
 
 	debug_work_activate(work);
 
-	spin_lock_irqsave(&gcwq->lock, flags);
+	/* determine gcwq to use */
+	if (!(wq->flags & WQ_SINGLE_CPU)) {
+		/* just use the requested cpu for multicpu workqueues */
+		gcwq = get_gcwq(cpu);
+		spin_lock_irqsave(&gcwq->lock, flags);
+	} else {
+		unsigned int req_cpu = cpu;
+
+		/*
+		 * It's a bit more complex for single cpu workqueues.
+		 * We first need to determine which cpu is going to be
+		 * used.  If no cpu is currently serving this
+		 * workqueue, arbitrate using atomic accesses to
+		 * wq->single_cpu; otherwise, use the current one.
+		 */
+	retry:
+		cpu = wq->single_cpu;
+		arbitrate = cpu == NR_CPUS;
+		if (arbitrate)
+			cpu = req_cpu;
+
+		gcwq = get_gcwq(cpu);
+		spin_lock_irqsave(&gcwq->lock, flags);
+
+		/*
+		 * The following cmpxchg() is a full barrier paired
+		 * with smp_wmb() in cwq_unbind_single_cpu() and
+		 * guarantees that all changes to wq->st_* fields are
+		 * visible on the new cpu after this point.
+		 */
+		if (arbitrate)
+			cmpxchg(&wq->single_cpu, NR_CPUS, cpu);
+
+		if (unlikely(wq->single_cpu != cpu)) {
+			spin_unlock_irqrestore(&gcwq->lock, flags);
+			goto retry;
+		}
+	}
+
+	/* gcwq determined, get cwq and queue */
+	cwq = get_cwq(gcwq->cpu, wq);
+
 	BUG_ON(!list_empty(&work->entry));
 
 	cwq->nr_in_flight[cwq->work_color]++;
@@ -530,7 +591,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		timer_stats_timer_set_start_info(&dwork->timer);
 
 		/* This stores cwq for the moment, for the timer_fn */
-		set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
+		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -790,10 +851,14 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 	cwq->nr_in_flight[color]--;
 	cwq->nr_active--;
 
-	/* one down, submit a delayed one */
-	if (!list_empty(&cwq->delayed_works) &&
-	    cwq->nr_active < cwq->max_active)
-		cwq_activate_first_delayed(cwq);
+	if (!list_empty(&cwq->delayed_works)) {
+		/* one down, submit a delayed one */
+		if (cwq->nr_active < cwq->max_active)
+			cwq_activate_first_delayed(cwq);
+	} else if (!cwq->nr_active && cwq->wq->flags & WQ_SINGLE_CPU) {
+		/* this was the last work, unbind from single cpu */
+		cwq_unbind_single_cpu(cwq);
+	}
 
 	/* is flush in progress and are we at the flushing tip? */
 	if (likely(cwq->flush_color != color))
@@ -1727,7 +1792,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 						struct lock_class_key *key,
 						const char *lock_name)
 {
-	bool singlethread = flags & WQ_SINGLE_THREAD;
 	struct workqueue_struct *wq;
 	bool failed = false;
 	unsigned int cpu;
@@ -1748,6 +1812,8 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
+	wq->single_cpu = NR_CPUS;
+
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
@@ -1773,8 +1839,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 		if (failed)
 			continue;
-		cwq->worker = create_worker(cwq,
-					    cpu_online(cpu) && !singlethread);
+		cwq->worker = create_worker(cwq, cpu_online(cpu));
 		if (cwq->worker)
 			start_worker(cwq->worker);
 		else
@@ -1958,18 +2023,16 @@ static int __cpuinit trustee_thread(void *__gcwq)
 
 	spin_lock_irq(&gcwq->lock);
 	/*
-	 * Make all multithread workers rogue.  Trustee must be bound
-	 * to the target cpu and can't be cancelled.
+	 * Make all workers rogue.  Trustee must be bound to the
+	 * target cpu and can't be cancelled.
 	 */
 	BUG_ON(gcwq->cpu != smp_processor_id());
 
 	list_for_each_entry(worker, &gcwq->idle_list, entry)
-		if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-			worker->flags |= WORKER_ROGUE;
+		worker->flags |= WORKER_ROGUE;
 
 	for_each_busy_worker(worker, i, pos, gcwq)
-		if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-			worker->flags |= WORKER_ROGUE;
+		worker->flags |= WORKER_ROGUE;
 
 	/*
 	 * We're now in charge.  Notify and proceed to drain.  We need
@@ -2074,14 +2137,12 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 			wait_trustee_state(gcwq, TRUSTEE_DONE);
 		}
 
-		/* clear ROGUE from all multithread workers */
+		/* clear ROGUE from all workers */
 		list_for_each_entry(worker, &gcwq->idle_list, entry)
-			if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-				worker->flags &= ~WORKER_ROGUE;
+			worker->flags &= ~WORKER_ROGUE;
 
 		for_each_busy_worker(worker, i, pos, gcwq)
-			if (!(worker->cwq->wq->flags & WQ_SINGLE_THREAD))
-				worker->flags &= ~WORKER_ROGUE;
+			worker->flags &= ~WORKER_ROGUE;
 		break;
 	}
 
@@ -2266,6 +2327,11 @@ void thaw_workqueues(void)
 			       cwq->nr_active < cwq->max_active)
 				cwq_activate_first_delayed(cwq);
 
+			/* perform delayed unbind from single cpu if empty */
+			if (wq->single_cpu == gcwq->cpu &&
+			    !cwq->nr_active && list_empty(&cwq->delayed_works))
+				cwq_unbind_single_cpu(cwq);
+
 			wake_up_process(cwq->worker->task);
 		}
 
@@ -2283,7 +2349,6 @@ void __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
-	singlethread_cpu = cpumask_first(cpu_possible_mask);
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-- 
cgit v1.2.3


From 7a22ad757ec75186ad43a5b4670fa7423ee8f480 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:13 +0200
Subject: workqueue: carry cpu number in work data once execution starts

To implement non-reentrant workqueue, the last gcwq a work was
executed on must be reliably obtainable as long as the work structure
is valid even if the previous workqueue has been destroyed.

To achieve this, work->data will be overloaded to carry the last cpu
number once execution starts so that the previous gcwq can be located
reliably.  This means that cwq can't be obtained from work after
execution starts but only gcwq.

Implement set_work_{cwq|cpu}(), get_work_[g]cwq() and
clear_work_data() to set work data to the cpu number when starting
execution, access the overloaded work data and clear it after
cancellation.

queue_delayed_work_on() is updated to preserve the last cpu while
in-flight in timer and other callers which depended on getting cwq
from work after execution starts are converted to depend on gcwq
instead.

* Anton Blanchard fixed compile error on powerpc due to missing
  linux/threads.h include.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Anton Blanchard <anton@samba.org>
---
 include/linux/workqueue.h |   7 +-
 kernel/workqueue.c        | 163 +++++++++++++++++++++++++++++-----------------
 2 files changed, 109 insertions(+), 61 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 10611f7fc809..0a7814131e66 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -9,6 +9,7 @@
 #include <linux/linkage.h>
 #include <linux/bitops.h>
 #include <linux/lockdep.h>
+#include <linux/threads.h>
 #include <asm/atomic.h>
 
 struct workqueue_struct;
@@ -59,6 +60,7 @@ enum {
 
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
+	WORK_STRUCT_NO_CPU	= NR_CPUS << WORK_STRUCT_FLAG_BITS,
 };
 
 struct work_struct {
@@ -70,8 +72,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(0)
-#define WORK_DATA_STATIC_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_STATIC)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_STATIC_INIT()	\
+	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c276dec75ea4..c68277c204ab 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -319,31 +319,71 @@ static int work_next_color(int color)
 }
 
 /*
- * Set the workqueue on which a work item is to be run
- * - Must *only* be called if the pending flag is set
+ * Work data points to the cwq while a work is on queue.  Once
+ * execution starts, it points to the cpu the work was last on.  This
+ * can be distinguished by comparing the data value against
+ * PAGE_OFFSET.
+ *
+ * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
+ * cwq, cpu or clear work->data.  These functions should only be
+ * called while the work is owned - ie. while the PENDING bit is set.
+ *
+ * get_work_[g]cwq() can be used to obtain the gcwq or cwq
+ * corresponding to a work.  gcwq is available once the work has been
+ * queued anywhere after initialization.  cwq is available only from
+ * queueing until execution starts.
  */
-static inline void set_wq_data(struct work_struct *work,
-			       struct cpu_workqueue_struct *cwq,
-			       unsigned long extra_flags)
+static inline void set_work_data(struct work_struct *work, unsigned long data,
+				 unsigned long flags)
 {
 	BUG_ON(!work_pending(work));
+	atomic_long_set(&work->data, data | flags | work_static(work));
+}
 
-	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
-			WORK_STRUCT_PENDING | extra_flags);
+static void set_work_cwq(struct work_struct *work,
+			 struct cpu_workqueue_struct *cwq,
+			 unsigned long extra_flags)
+{
+	set_work_data(work, (unsigned long)cwq,
+		      WORK_STRUCT_PENDING | extra_flags);
 }
 
-/*
- * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
- */
-static inline void clear_wq_data(struct work_struct *work)
+static void set_work_cpu(struct work_struct *work, unsigned int cpu)
+{
+	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING);
+}
+
+static void clear_work_data(struct work_struct *work)
+{
+	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
+}
+
+static inline unsigned long get_work_data(struct work_struct *work)
+{
+	return atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK;
+}
+
+static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 {
-	atomic_long_set(&work->data, work_static(work));
+	unsigned long data = get_work_data(work);
+
+	return data >= PAGE_OFFSET ? (void *)data : NULL;
 }
 
-static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
+static struct global_cwq *get_work_gcwq(struct work_struct *work)
 {
-	return (void *)(atomic_long_read(&work->data) &
-			WORK_STRUCT_WQ_DATA_MASK);
+	unsigned long data = get_work_data(work);
+	unsigned int cpu;
+
+	if (data >= PAGE_OFFSET)
+		return ((struct cpu_workqueue_struct *)data)->gcwq;
+
+	cpu = data >> WORK_STRUCT_FLAG_BITS;
+	if (cpu == NR_CPUS)
+		return NULL;
+
+	BUG_ON(cpu >= num_possible_cpus());
+	return get_gcwq(cpu);
 }
 
 /**
@@ -443,7 +483,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 			unsigned int extra_flags)
 {
 	/* we own @work, set data and link */
-	set_wq_data(work, cwq, extra_flags);
+	set_work_cwq(work, cwq, extra_flags);
 
 	/*
 	 * Ensure that we get the right work->data if we see the
@@ -599,7 +639,7 @@ EXPORT_SYMBOL_GPL(queue_work_on);
 static void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
+	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 }
@@ -639,13 +679,19 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct work_struct *work = &dwork->work;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+		struct global_cwq *gcwq = get_work_gcwq(work);
+		unsigned int lcpu = gcwq ? gcwq->cpu : raw_smp_processor_id();
+
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		timer_stats_timer_set_start_info(&dwork->timer);
-
-		/* This stores cwq for the moment, for the timer_fn */
-		set_wq_data(work, get_cwq(raw_smp_processor_id(), wq), 0);
+		/*
+		 * This stores cwq for the moment, for the timer_fn.
+		 * Note that the work's gcwq is preserved to allow
+		 * reentrance detection for delayed works.
+		 */
+		set_work_cwq(work, get_cwq(lcpu, wq), 0);
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -970,11 +1016,14 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	worker->current_work = work;
 	worker->current_cwq = cwq;
 	work_color = get_work_color(work);
+
+	BUG_ON(get_work_cwq(work) != cwq);
+	/* record the current cpu number in the work data and dequeue */
+	set_work_cpu(work, gcwq->cpu);
 	list_del_init(&work->entry);
 
 	spin_unlock_irq(&gcwq->lock);
 
-	BUG_ON(get_wq_data(work) != cwq);
 	work_clear_pending(work);
 	lock_map_acquire(&cwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
@@ -1406,37 +1455,39 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
 int flush_work(struct work_struct *work)
 {
 	struct worker *worker = NULL;
-	struct cpu_workqueue_struct *cwq;
 	struct global_cwq *gcwq;
+	struct cpu_workqueue_struct *cwq;
 	struct wq_barrier barr;
 
 	might_sleep();
-	cwq = get_wq_data(work);
-	if (!cwq)
+	gcwq = get_work_gcwq(work);
+	if (!gcwq)
 		return 0;
-	gcwq = cwq->gcwq;
-
-	lock_map_acquire(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
 
 	spin_lock_irq(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
 		 * See the comment near try_to_grab_pending()->smp_rmb().
-		 * If it was re-queued under us we are not going to wait.
+		 * If it was re-queued to a different gcwq under us, we
+		 * are not going to wait.
 		 */
 		smp_rmb();
-		if (unlikely(cwq != get_wq_data(work)))
+		cwq = get_work_cwq(work);
+		if (unlikely(!cwq || gcwq != cwq->gcwq))
 			goto already_gone;
 	} else {
-		if (cwq->worker && cwq->worker->current_work == work)
-			worker = cwq->worker;
+		worker = find_worker_executing_work(gcwq, work);
 		if (!worker)
 			goto already_gone;
+		cwq = worker->current_cwq;
 	}
 
 	insert_wq_barrier(cwq, &barr, work, worker);
 	spin_unlock_irq(&gcwq->lock);
+
+	lock_map_acquire(&cwq->wq->lockdep_map);
+	lock_map_release(&cwq->wq->lockdep_map);
+
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
 	return 1;
@@ -1453,7 +1504,6 @@ EXPORT_SYMBOL_GPL(flush_work);
 static int try_to_grab_pending(struct work_struct *work)
 {
 	struct global_cwq *gcwq;
-	struct cpu_workqueue_struct *cwq;
 	int ret = -1;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
@@ -1463,24 +1513,23 @@ static int try_to_grab_pending(struct work_struct *work)
 	 * The queueing is in progress, or it is already queued. Try to
 	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
 	 */
-
-	cwq = get_wq_data(work);
-	if (!cwq)
+	gcwq = get_work_gcwq(work);
+	if (!gcwq)
 		return ret;
-	gcwq = cwq->gcwq;
 
 	spin_lock_irq(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
-		 * This work is queued, but perhaps we locked the wrong cwq.
+		 * This work is queued, but perhaps we locked the wrong gcwq.
 		 * In that case we must see the new value after rmb(), see
 		 * insert_work()->wmb().
 		 */
 		smp_rmb();
-		if (cwq == get_wq_data(work)) {
+		if (gcwq == get_work_gcwq(work)) {
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
-			cwq_dec_nr_in_flight(cwq, get_work_color(work));
+			cwq_dec_nr_in_flight(get_work_cwq(work),
+					     get_work_color(work));
 			ret = 1;
 		}
 	}
@@ -1489,20 +1538,16 @@ static int try_to_grab_pending(struct work_struct *work)
 	return ret;
 }
 
-static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
-				struct work_struct *work)
+static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
 {
-	struct global_cwq *gcwq = cwq->gcwq;
 	struct wq_barrier barr;
 	struct worker *worker;
 
 	spin_lock_irq(&gcwq->lock);
 
-	worker = NULL;
-	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
-		worker = cwq->worker;
-		insert_wq_barrier(cwq, &barr, work, worker);
-	}
+	worker = find_worker_executing_work(gcwq, work);
+	if (unlikely(worker))
+		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
 
 	spin_unlock_irq(&gcwq->lock);
 
@@ -1514,8 +1559,6 @@ static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
 
 static void wait_on_work(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq;
-	struct workqueue_struct *wq;
 	int cpu;
 
 	might_sleep();
@@ -1523,14 +1566,8 @@ static void wait_on_work(struct work_struct *work)
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	cwq = get_wq_data(work);
-	if (!cwq)
-		return;
-
-	wq = cwq->wq;
-
 	for_each_possible_cpu(cpu)
-		wait_on_cpu_work(get_cwq(cpu, wq), work);
+		wait_on_cpu_work(get_gcwq(cpu), work);
 }
 
 static int __cancel_work_timer(struct work_struct *work,
@@ -1545,7 +1582,7 @@ static int __cancel_work_timer(struct work_struct *work,
 		wait_on_work(work);
 	} while (unlikely(ret < 0));
 
-	clear_wq_data(work);
+	clear_work_data(work);
 	return ret;
 }
 
@@ -1647,7 +1684,7 @@ EXPORT_SYMBOL(schedule_delayed_work);
 void flush_delayed_work(struct delayed_work *dwork)
 {
 	if (del_timer_sync(&dwork->timer)) {
-		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
+		__queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq,
 			     &dwork->work);
 		put_cpu();
 	}
@@ -2405,6 +2442,14 @@ void __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
+	/*
+	 * The pointer part of work->data is either pointing to the
+	 * cwq or contains the cpu number the work ran last on.  Make
+	 * sure cpu number won't overflow into kernel pointer area so
+	 * that they can be distinguished.
+	 */
+	BUILD_BUG_ON(NR_CPUS << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
+
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-- 
cgit v1.2.3


From 18aa9effad4adb2c1efe123af4eb24fec9f59b30 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:13 +0200
Subject: workqueue: implement WQ_NON_REENTRANT

With gcwq managing all the workers and work->data pointing to the last
gcwq it was on, non-reentrance can be easily implemented by checking
whether the work is still running on the previous gcwq on queueing.
Implement it.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 32 +++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0a7814131e66..07cf5e5f91cb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -225,6 +225,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
+	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 };
 
 extern struct workqueue_struct *
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c68277c204ab..bce1074bdec1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -534,11 +534,37 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
-	/* determine gcwq to use */
+	/*
+	 * Determine gcwq to use.  SINGLE_CPU is inherently
+	 * NON_REENTRANT, so test it first.
+	 */
 	if (!(wq->flags & WQ_SINGLE_CPU)) {
-		/* just use the requested cpu for multicpu workqueues */
+		struct global_cwq *last_gcwq;
+
+		/*
+		 * It's multi cpu.  If @wq is non-reentrant and @work
+		 * was previously on a different cpu, it might still
+		 * be running there, in which case the work needs to
+		 * be queued on that cpu to guarantee non-reentrance.
+		 */
 		gcwq = get_gcwq(cpu);
-		spin_lock_irqsave(&gcwq->lock, flags);
+		if (wq->flags & WQ_NON_REENTRANT &&
+		    (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) {
+			struct worker *worker;
+
+			spin_lock_irqsave(&last_gcwq->lock, flags);
+
+			worker = find_worker_executing_work(last_gcwq, work);
+
+			if (worker && worker->current_cwq->wq == wq)
+				gcwq = last_gcwq;
+			else {
+				/* meh... not running there, queue here */
+				spin_unlock_irqrestore(&last_gcwq->lock, flags);
+				spin_lock_irqsave(&gcwq->lock, flags);
+			}
+		} else
+			spin_lock_irqsave(&gcwq->lock, flags);
 	} else {
 		unsigned int req_cpu = cpu;
 
-- 
cgit v1.2.3


From e22bee782b3b00bd4534ae9b1c5fb2e8e6573c5c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: implement concurrency managed dynamic worker pool

Instead of creating a worker for each cwq and putting it into the
shared pool, manage per-cpu workers dynamically.

Works aren't supposed to be cpu cycle hogs and maintaining just enough
concurrency to prevent work processing from stalling due to lack of
processing context is optimal.  gcwq keeps the number of concurrent
active workers to minimum but no less.  As long as there's one or more
running workers on the cpu, no new worker is scheduled so that works
can be processed in batch as much as possible but when the last
running worker blocks, gcwq immediately schedules new worker so that
the cpu doesn't sit idle while there are works to be processed.

gcwq always keeps at least single idle worker around.  When a new
worker is necessary and the worker is the last idle one, the worker
assumes the role of "manager" and manages the worker pool -
ie. creates another worker.  Forward-progress is guaranteed by having
dedicated rescue workers for workqueues which may be necessary while
creating a new worker.  When the manager is having problem creating a
new worker, mayday timer activates and rescue workers are summoned to
the cpu and execute works which might be necessary to create new
workers.

Trustee is expanded to serve the role of manager while a CPU is being
taken down and stays down.  As no new works are supposed to be queued
on a dead cpu, it just needs to drain all the existing ones.  Trustee
continues to try to create new workers and summon rescuers as long as
there are pending works.  If the CPU is brought back up while the
trustee is still trying to drain the gcwq from the previous offlining,
the trustee will kill all idles ones and tell workers which are still
busy to rebind to the cpu, and pass control over to gcwq which assumes
the manager role as necessary.

Concurrency managed worker pool reduces the number of workers
drastically.  Only workers which are necessary to keep the processing
going are created and kept.  Also, it reduces cache footprint by
avoiding unnecessarily switching contexts between different workers.

Please note that this patch does not increase max_active of any
workqueue.  All workqueues can still only process one work per cpu.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   8 +-
 kernel/workqueue.c        | 936 +++++++++++++++++++++++++++++++++++++++++-----
 kernel/workqueue_sched.h  |  13 +-
 3 files changed, 841 insertions(+), 116 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 07cf5e5f91cb..b8f4ec45c40a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -226,6 +226,7 @@ enum {
 	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
+	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 };
 
 extern struct workqueue_struct *
@@ -252,11 +253,12 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active,
 #endif
 
 #define create_workqueue(name)					\
-	__create_workqueue((name), 0, 1)
+	__create_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU, 1)
+	__create_workqueue((name),				\
+			   WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_CPU, 1)
+	__create_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4c31fde092c6..0ad46523b423 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -34,17 +34,25 @@
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
 #include <linux/idr.h>
-#include <linux/delay.h>
+
+#include "workqueue_sched.h"
 
 enum {
 	/* global_cwq flags */
+	GCWQ_MANAGE_WORKERS	= 1 << 0,	/* need to manage workers */
+	GCWQ_MANAGING_WORKERS	= 1 << 1,	/* managing workers */
+	GCWQ_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 	GCWQ_FREEZING		= 1 << 3,	/* freeze in progress */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
 	WORKER_DIE		= 1 << 1,	/* die die die */
 	WORKER_IDLE		= 1 << 2,	/* is idle */
+	WORKER_PREP		= 1 << 3,	/* preparing to run works */
 	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
+	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
+
+	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND,
 
 	/* gcwq->trustee_state */
 	TRUSTEE_START		= 0,		/* start */
@@ -57,7 +65,19 @@ enum {
 	BUSY_WORKER_HASH_SIZE	= 1 << BUSY_WORKER_HASH_ORDER,
 	BUSY_WORKER_HASH_MASK	= BUSY_WORKER_HASH_SIZE - 1,
 
+	MAX_IDLE_WORKERS_RATIO	= 4,		/* 1/4 of busy can be idle */
+	IDLE_WORKER_TIMEOUT	= 300 * HZ,	/* keep idle ones for 5 mins */
+
+	MAYDAY_INITIAL_TIMEOUT	= HZ / 100,	/* call for help after 10ms */
+	MAYDAY_INTERVAL		= HZ / 10,	/* and then every 100ms */
+	CREATE_COOLDOWN		= HZ,		/* time to breath after fail */
 	TRUSTEE_COOLDOWN	= HZ / 10,	/* for trustee draining */
+
+	/*
+	 * Rescue workers are used only on emergencies and shared by
+	 * all cpus.  Give -20.
+	 */
+	RESCUER_NICE_LEVEL	= -20,
 };
 
 /*
@@ -65,8 +85,16 @@ enum {
  *
  * I: Set during initialization and read-only afterwards.
  *
+ * P: Preemption protected.  Disabling preemption is enough and should
+ *    only be modified and accessed from the local cpu.
+ *
  * L: gcwq->lock protected.  Access with gcwq->lock held.
  *
+ * X: During normal operation, modification requires gcwq->lock and
+ *    should be done only from local cpu.  Either disabling preemption
+ *    on local cpu or grabbing gcwq->lock is enough for read access.
+ *    While trustee is in charge, it's identical to L.
+ *
  * F: wq->flush_mutex protected.
  *
  * W: workqueue_lock protected.
@@ -74,6 +102,10 @@ enum {
 
 struct global_cwq;
 
+/*
+ * The poor guys doing the actual heavy lifting.  All on-duty workers
+ * are either serving the manager role, on idle list or on busy hash.
+ */
 struct worker {
 	/* on idle list while idle, on busy hash table while busy */
 	union {
@@ -86,12 +118,17 @@ struct worker {
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct global_cwq	*gcwq;		/* I: the associated gcwq */
-	unsigned int		flags;		/* L: flags */
+	/* 64 bytes boundary on 64bit, 32 on 32bit */
+	unsigned long		last_active;	/* L: last active timestamp */
+	unsigned int		flags;		/* X: flags */
 	int			id;		/* I: worker id */
+	struct work_struct	rebind_work;	/* L: rebind worker to cpu */
 };
 
 /*
- * Global per-cpu workqueue.
+ * Global per-cpu workqueue.  There's one and only one for each cpu
+ * and all works are queued and processed here regardless of their
+ * target workqueues.
  */
 struct global_cwq {
 	spinlock_t		lock;		/* the gcwq lock */
@@ -103,15 +140,19 @@ struct global_cwq {
 	int			nr_idle;	/* L: currently idle ones */
 
 	/* workers are chained either in the idle_list or busy_hash */
-	struct list_head	idle_list;	/* L: list of idle workers */
+	struct list_head	idle_list;	/* X: list of idle workers */
 	struct hlist_head	busy_hash[BUSY_WORKER_HASH_SIZE];
 						/* L: hash of busy workers */
 
+	struct timer_list	idle_timer;	/* L: worker idle timeout */
+	struct timer_list	mayday_timer;	/* L: SOS timer for dworkers */
+
 	struct ida		worker_ida;	/* L: for worker IDs */
 
 	struct task_struct	*trustee;	/* L: for gcwq shutdown */
 	unsigned int		trustee_state;	/* L: trustee state */
 	wait_queue_head_t	trustee_wait;	/* trustee wait */
+	struct worker		*first_idle;	/* L: first idle worker */
 } ____cacheline_aligned_in_smp;
 
 /*
@@ -121,7 +162,6 @@ struct global_cwq {
  */
 struct cpu_workqueue_struct {
 	struct global_cwq	*gcwq;		/* I: the associated gcwq */
-	struct worker		*worker;
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
 	int			flush_color;	/* L: flushing color */
@@ -160,6 +200,9 @@ struct workqueue_struct {
 
 	unsigned long		single_cpu;	/* cpu for single cpu wq */
 
+	cpumask_var_t		mayday_mask;	/* cpus requesting rescue */
+	struct worker		*rescuer;	/* I: rescue worker */
+
 	int			saved_max_active; /* I: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
@@ -286,7 +329,13 @@ static DEFINE_SPINLOCK(workqueue_lock);
 static LIST_HEAD(workqueues);
 static bool workqueue_freezing;		/* W: have wqs started freezing? */
 
+/*
+ * The almighty global cpu workqueues.  nr_running is the only field
+ * which is expected to be used frequently by other cpus via
+ * try_to_wake_up().  Put it in a separate cacheline.
+ */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
+static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
 
 static int worker_thread(void *__worker);
 
@@ -295,6 +344,11 @@ static struct global_cwq *get_gcwq(unsigned int cpu)
 	return &per_cpu(global_cwq, cpu);
 }
 
+static atomic_t *get_gcwq_nr_running(unsigned int cpu)
+{
+	return &per_cpu(gcwq_nr_running, cpu);
+}
+
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
@@ -385,6 +439,63 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 	return get_gcwq(cpu);
 }
 
+/*
+ * Policy functions.  These define the policies on how the global
+ * worker pool is managed.  Unless noted otherwise, these functions
+ * assume that they're being called with gcwq->lock held.
+ */
+
+/*
+ * Need to wake up a worker?  Called from anything but currently
+ * running workers.
+ */
+static bool need_more_worker(struct global_cwq *gcwq)
+{
+	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
+
+	return !list_empty(&gcwq->worklist) && !atomic_read(nr_running);
+}
+
+/* Can I start working?  Called from busy but !running workers. */
+static bool may_start_working(struct global_cwq *gcwq)
+{
+	return gcwq->nr_idle;
+}
+
+/* Do I need to keep working?  Called from currently running workers. */
+static bool keep_working(struct global_cwq *gcwq)
+{
+	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
+
+	return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1;
+}
+
+/* Do we need a new worker?  Called from manager. */
+static bool need_to_create_worker(struct global_cwq *gcwq)
+{
+	return need_more_worker(gcwq) && !may_start_working(gcwq);
+}
+
+/* Do I need to be the manager? */
+static bool need_to_manage_workers(struct global_cwq *gcwq)
+{
+	return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS;
+}
+
+/* Do we have too many workers and should some go away? */
+static bool too_many_workers(struct global_cwq *gcwq)
+{
+	bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS;
+	int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */
+	int nr_busy = gcwq->nr_workers - nr_idle;
+
+	return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
+}
+
+/*
+ * Wake up functions.
+ */
+
 /* Return the first worker.  Safe with preemption disabled */
 static struct worker *first_worker(struct global_cwq *gcwq)
 {
@@ -412,12 +523,77 @@ static void wake_up_worker(struct global_cwq *gcwq)
 }
 
 /**
- * worker_set_flags - set worker flags
+ * wq_worker_waking_up - a worker is waking up
+ * @task: task waking up
+ * @cpu: CPU @task is waking up to
+ *
+ * This function is called during try_to_wake_up() when a worker is
+ * being awoken.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ */
+void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+{
+	struct worker *worker = kthread_data(task);
+
+	if (likely(!(worker->flags & WORKER_NOT_RUNNING)))
+		atomic_inc(get_gcwq_nr_running(cpu));
+}
+
+/**
+ * wq_worker_sleeping - a worker is going to sleep
+ * @task: task going to sleep
+ * @cpu: CPU in question, must be the current CPU number
+ *
+ * This function is called during schedule() when a busy worker is
+ * going to sleep.  Worker on the same cpu can be woken up by
+ * returning pointer to its task.
+ *
+ * CONTEXT:
+ * spin_lock_irq(rq->lock)
+ *
+ * RETURNS:
+ * Worker task on @cpu to wake up, %NULL if none.
+ */
+struct task_struct *wq_worker_sleeping(struct task_struct *task,
+				       unsigned int cpu)
+{
+	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
+	struct global_cwq *gcwq = get_gcwq(cpu);
+	atomic_t *nr_running = get_gcwq_nr_running(cpu);
+
+	if (unlikely(worker->flags & WORKER_NOT_RUNNING))
+		return NULL;
+
+	/* this can only happen on the local cpu */
+	BUG_ON(cpu != raw_smp_processor_id());
+
+	/*
+	 * The counterpart of the following dec_and_test, implied mb,
+	 * worklist not empty test sequence is in insert_work().
+	 * Please read comment there.
+	 *
+	 * NOT_RUNNING is clear.  This means that trustee is not in
+	 * charge and we're running on the local cpu w/ rq lock held
+	 * and preemption disabled, which in turn means that none else
+	 * could be manipulating idle_list, so dereferencing idle_list
+	 * without gcwq lock is safe.
+	 */
+	if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist))
+		to_wakeup = first_worker(gcwq);
+	return to_wakeup ? to_wakeup->task : NULL;
+}
+
+/**
+ * worker_set_flags - set worker flags and adjust nr_running accordingly
  * @worker: worker to set flags for
  * @flags: flags to set
  * @wakeup: wakeup an idle worker if necessary
  *
- * Set @flags in @worker->flags.
+ * Set @flags in @worker->flags and adjust nr_running accordingly.  If
+ * nr_running becomes zero and @wakeup is %true, an idle worker is
+ * woken up.
  *
  * LOCKING:
  * spin_lock_irq(gcwq->lock).
@@ -425,22 +601,49 @@ static void wake_up_worker(struct global_cwq *gcwq)
 static inline void worker_set_flags(struct worker *worker, unsigned int flags,
 				    bool wakeup)
 {
+	struct global_cwq *gcwq = worker->gcwq;
+
+	/*
+	 * If transitioning into NOT_RUNNING, adjust nr_running and
+	 * wake up an idle worker as necessary if requested by
+	 * @wakeup.
+	 */
+	if ((flags & WORKER_NOT_RUNNING) &&
+	    !(worker->flags & WORKER_NOT_RUNNING)) {
+		atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
+
+		if (wakeup) {
+			if (atomic_dec_and_test(nr_running) &&
+			    !list_empty(&gcwq->worklist))
+				wake_up_worker(gcwq);
+		} else
+			atomic_dec(nr_running);
+	}
+
 	worker->flags |= flags;
 }
 
 /**
- * worker_clr_flags - clear worker flags
+ * worker_clr_flags - clear worker flags and adjust nr_running accordingly
  * @worker: worker to set flags for
  * @flags: flags to clear
  *
- * Clear @flags in @worker->flags.
+ * Clear @flags in @worker->flags and adjust nr_running accordingly.
  *
  * LOCKING:
  * spin_lock_irq(gcwq->lock).
  */
 static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
 {
+	struct global_cwq *gcwq = worker->gcwq;
+	unsigned int oflags = worker->flags;
+
 	worker->flags &= ~flags;
+
+	/* if transitioning out of NOT_RUNNING, increment nr_running */
+	if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
+		if (!(worker->flags & WORKER_NOT_RUNNING))
+			atomic_inc(get_gcwq_nr_running(gcwq->cpu));
 }
 
 /**
@@ -540,6 +743,8 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 			struct work_struct *work, struct list_head *head,
 			unsigned int extra_flags)
 {
+	struct global_cwq *gcwq = cwq->gcwq;
+
 	/* we own @work, set data and link */
 	set_work_cwq(work, cwq, extra_flags);
 
@@ -550,7 +755,16 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	smp_wmb();
 
 	list_add_tail(&work->entry, head);
-	wake_up_worker(cwq->gcwq);
+
+	/*
+	 * Ensure either worker_sched_deactivated() sees the above
+	 * list_add_tail() or we see zero nr_running to avoid workers
+	 * lying around lazily while there are works to be processed.
+	 */
+	smp_mb();
+
+	if (!atomic_read(get_gcwq_nr_running(gcwq->cpu)))
+		wake_up_worker(gcwq);
 }
 
 /**
@@ -810,11 +1024,16 @@ static void worker_enter_idle(struct worker *worker)
 
 	worker_set_flags(worker, WORKER_IDLE, false);
 	gcwq->nr_idle++;
+	worker->last_active = jiffies;
 
 	/* idle_list is LIFO */
 	list_add(&worker->entry, &gcwq->idle_list);
 
-	if (unlikely(worker->flags & WORKER_ROGUE))
+	if (likely(!(worker->flags & WORKER_ROGUE))) {
+		if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer))
+			mod_timer(&gcwq->idle_timer,
+				  jiffies + IDLE_WORKER_TIMEOUT);
+	} else
 		wake_up_all(&gcwq->trustee_wait);
 }
 
@@ -837,6 +1056,81 @@ static void worker_leave_idle(struct worker *worker)
 	list_del_init(&worker->entry);
 }
 
+/**
+ * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq
+ * @worker: self
+ *
+ * Works which are scheduled while the cpu is online must at least be
+ * scheduled to a worker which is bound to the cpu so that if they are
+ * flushed from cpu callbacks while cpu is going down, they are
+ * guaranteed to execute on the cpu.
+ *
+ * This function is to be used by rogue workers and rescuers to bind
+ * themselves to the target cpu and may race with cpu going down or
+ * coming online.  kthread_bind() can't be used because it may put the
+ * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
+ * verbatim as it's best effort and blocking and gcwq may be
+ * [dis]associated in the meantime.
+ *
+ * This function tries set_cpus_allowed() and locks gcwq and verifies
+ * the binding against GCWQ_DISASSOCIATED which is set during
+ * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters
+ * idle state or fetches works without dropping lock, it can guarantee
+ * the scheduling requirement described in the first paragraph.
+ *
+ * CONTEXT:
+ * Might sleep.  Called without any lock but returns with gcwq->lock
+ * held.
+ *
+ * RETURNS:
+ * %true if the associated gcwq is online (@worker is successfully
+ * bound), %false if offline.
+ */
+static bool worker_maybe_bind_and_lock(struct worker *worker)
+{
+	struct global_cwq *gcwq = worker->gcwq;
+	struct task_struct *task = worker->task;
+
+	while (true) {
+		/*
+		 * The following call may fail, succeed or succeed
+		 * without actually migrating the task to the cpu if
+		 * it races with cpu hotunplug operation.  Verify
+		 * against GCWQ_DISASSOCIATED.
+		 */
+		set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
+
+		spin_lock_irq(&gcwq->lock);
+		if (gcwq->flags & GCWQ_DISASSOCIATED)
+			return false;
+		if (task_cpu(task) == gcwq->cpu &&
+		    cpumask_equal(&current->cpus_allowed,
+				  get_cpu_mask(gcwq->cpu)))
+			return true;
+		spin_unlock_irq(&gcwq->lock);
+
+		/* CPU has come up inbetween, retry migration */
+		cpu_relax();
+	}
+}
+
+/*
+ * Function for worker->rebind_work used to rebind rogue busy workers
+ * to the associated cpu which is coming back online.  This is
+ * scheduled by cpu up but can race with other cpu hotplug operations
+ * and may be executed twice without intervening cpu down.
+ */
+static void worker_rebind_fn(struct work_struct *work)
+{
+	struct worker *worker = container_of(work, struct worker, rebind_work);
+	struct global_cwq *gcwq = worker->gcwq;
+
+	if (worker_maybe_bind_and_lock(worker))
+		worker_clr_flags(worker, WORKER_REBIND);
+
+	spin_unlock_irq(&gcwq->lock);
+}
+
 static struct worker *alloc_worker(void)
 {
 	struct worker *worker;
@@ -845,6 +1139,9 @@ static struct worker *alloc_worker(void)
 	if (worker) {
 		INIT_LIST_HEAD(&worker->entry);
 		INIT_LIST_HEAD(&worker->scheduled);
+		INIT_WORK(&worker->rebind_work, worker_rebind_fn);
+		/* on creation a worker is in !idle && prep state */
+		worker->flags = WORKER_PREP;
 	}
 	return worker;
 }
@@ -963,6 +1260,220 @@ static void destroy_worker(struct worker *worker)
 	ida_remove(&gcwq->worker_ida, id);
 }
 
+static void idle_worker_timeout(unsigned long __gcwq)
+{
+	struct global_cwq *gcwq = (void *)__gcwq;
+
+	spin_lock_irq(&gcwq->lock);
+
+	if (too_many_workers(gcwq)) {
+		struct worker *worker;
+		unsigned long expires;
+
+		/* idle_list is kept in LIFO order, check the last one */
+		worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
+		expires = worker->last_active + IDLE_WORKER_TIMEOUT;
+
+		if (time_before(jiffies, expires))
+			mod_timer(&gcwq->idle_timer, expires);
+		else {
+			/* it's been idle for too long, wake up manager */
+			gcwq->flags |= GCWQ_MANAGE_WORKERS;
+			wake_up_worker(gcwq);
+		}
+	}
+
+	spin_unlock_irq(&gcwq->lock);
+}
+
+static bool send_mayday(struct work_struct *work)
+{
+	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct workqueue_struct *wq = cwq->wq;
+
+	if (!(wq->flags & WQ_RESCUER))
+		return false;
+
+	/* mayday mayday mayday */
+	if (!cpumask_test_and_set_cpu(cwq->gcwq->cpu, wq->mayday_mask))
+		wake_up_process(wq->rescuer->task);
+	return true;
+}
+
+static void gcwq_mayday_timeout(unsigned long __gcwq)
+{
+	struct global_cwq *gcwq = (void *)__gcwq;
+	struct work_struct *work;
+
+	spin_lock_irq(&gcwq->lock);
+
+	if (need_to_create_worker(gcwq)) {
+		/*
+		 * We've been trying to create a new worker but
+		 * haven't been successful.  We might be hitting an
+		 * allocation deadlock.  Send distress signals to
+		 * rescuers.
+		 */
+		list_for_each_entry(work, &gcwq->worklist, entry)
+			send_mayday(work);
+	}
+
+	spin_unlock_irq(&gcwq->lock);
+
+	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL);
+}
+
+/**
+ * maybe_create_worker - create a new worker if necessary
+ * @gcwq: gcwq to create a new worker for
+ *
+ * Create a new worker for @gcwq if necessary.  @gcwq is guaranteed to
+ * have at least one idle worker on return from this function.  If
+ * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is
+ * sent to all rescuers with works scheduled on @gcwq to resolve
+ * possible allocation deadlock.
+ *
+ * On return, need_to_create_worker() is guaranteed to be false and
+ * may_start_working() true.
+ *
+ * LOCKING:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  Does GFP_KERNEL allocations.  Called only from
+ * manager.
+ *
+ * RETURNS:
+ * false if no action was taken and gcwq->lock stayed locked, true
+ * otherwise.
+ */
+static bool maybe_create_worker(struct global_cwq *gcwq)
+{
+	if (!need_to_create_worker(gcwq))
+		return false;
+restart:
+	/* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */
+	mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT);
+
+	while (true) {
+		struct worker *worker;
+
+		spin_unlock_irq(&gcwq->lock);
+
+		worker = create_worker(gcwq, true);
+		if (worker) {
+			del_timer_sync(&gcwq->mayday_timer);
+			spin_lock_irq(&gcwq->lock);
+			start_worker(worker);
+			BUG_ON(need_to_create_worker(gcwq));
+			return true;
+		}
+
+		if (!need_to_create_worker(gcwq))
+			break;
+
+		spin_unlock_irq(&gcwq->lock);
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(CREATE_COOLDOWN);
+		spin_lock_irq(&gcwq->lock);
+		if (!need_to_create_worker(gcwq))
+			break;
+	}
+
+	spin_unlock_irq(&gcwq->lock);
+	del_timer_sync(&gcwq->mayday_timer);
+	spin_lock_irq(&gcwq->lock);
+	if (need_to_create_worker(gcwq))
+		goto restart;
+	return true;
+}
+
+/**
+ * maybe_destroy_worker - destroy workers which have been idle for a while
+ * @gcwq: gcwq to destroy workers for
+ *
+ * Destroy @gcwq workers which have been idle for longer than
+ * IDLE_WORKER_TIMEOUT.
+ *
+ * LOCKING:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  Called only from manager.
+ *
+ * RETURNS:
+ * false if no action was taken and gcwq->lock stayed locked, true
+ * otherwise.
+ */
+static bool maybe_destroy_workers(struct global_cwq *gcwq)
+{
+	bool ret = false;
+
+	while (too_many_workers(gcwq)) {
+		struct worker *worker;
+		unsigned long expires;
+
+		worker = list_entry(gcwq->idle_list.prev, struct worker, entry);
+		expires = worker->last_active + IDLE_WORKER_TIMEOUT;
+
+		if (time_before(jiffies, expires)) {
+			mod_timer(&gcwq->idle_timer, expires);
+			break;
+		}
+
+		destroy_worker(worker);
+		ret = true;
+	}
+
+	return ret;
+}
+
+/**
+ * manage_workers - manage worker pool
+ * @worker: self
+ *
+ * Assume the manager role and manage gcwq worker pool @worker belongs
+ * to.  At any given time, there can be only zero or one manager per
+ * gcwq.  The exclusion is handled automatically by this function.
+ *
+ * The caller can safely start processing works on false return.  On
+ * true return, it's guaranteed that need_to_create_worker() is false
+ * and may_start_working() is true.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock) which may be released and regrabbed
+ * multiple times.  Does GFP_KERNEL allocations.
+ *
+ * RETURNS:
+ * false if no action was taken and gcwq->lock stayed locked, true if
+ * some action was taken.
+ */
+static bool manage_workers(struct worker *worker)
+{
+	struct global_cwq *gcwq = worker->gcwq;
+	bool ret = false;
+
+	if (gcwq->flags & GCWQ_MANAGING_WORKERS)
+		return ret;
+
+	gcwq->flags &= ~GCWQ_MANAGE_WORKERS;
+	gcwq->flags |= GCWQ_MANAGING_WORKERS;
+
+	/*
+	 * Destroy and then create so that may_start_working() is true
+	 * on return.
+	 */
+	ret |= maybe_destroy_workers(gcwq);
+	ret |= maybe_create_worker(gcwq);
+
+	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
+
+	/*
+	 * The trustee might be waiting to take over the manager
+	 * position, tell it we're done.
+	 */
+	if (unlikely(gcwq->trustee))
+		wake_up_all(&gcwq->trustee_wait);
+
+	return ret;
+}
+
 /**
  * move_linked_works - move linked works to a list
  * @work: start of series of works to be scheduled
@@ -1169,24 +1680,39 @@ static void process_scheduled_works(struct worker *worker)
  * worker_thread - the worker thread function
  * @__worker: self
  *
- * The cwq worker thread function.
+ * The gcwq worker thread function.  There's a single dynamic pool of
+ * these per each cpu.  These workers process all works regardless of
+ * their specific target workqueue.  The only exception is works which
+ * belong to workqueues with a rescuer which will be explained in
+ * rescuer_thread().
  */
 static int worker_thread(void *__worker)
 {
 	struct worker *worker = __worker;
 	struct global_cwq *gcwq = worker->gcwq;
 
+	/* tell the scheduler that this is a workqueue worker */
+	worker->task->flags |= PF_WQ_WORKER;
 woke_up:
 	spin_lock_irq(&gcwq->lock);
 
 	/* DIE can be set only while we're idle, checking here is enough */
 	if (worker->flags & WORKER_DIE) {
 		spin_unlock_irq(&gcwq->lock);
+		worker->task->flags &= ~PF_WQ_WORKER;
 		return 0;
 	}
 
 	worker_leave_idle(worker);
 recheck:
+	/* no more worker necessary? */
+	if (!need_more_worker(gcwq))
+		goto sleep;
+
+	/* do we need to manage? */
+	if (unlikely(!may_start_working(gcwq)) && manage_workers(worker))
+		goto recheck;
+
 	/*
 	 * ->scheduled list can only be filled while a worker is
 	 * preparing to process a work or actually processing it.
@@ -1194,27 +1720,18 @@ recheck:
 	 */
 	BUG_ON(!list_empty(&worker->scheduled));
 
-	while (!list_empty(&gcwq->worklist)) {
+	/*
+	 * When control reaches this point, we're guaranteed to have
+	 * at least one idle worker or that someone else has already
+	 * assumed the manager role.
+	 */
+	worker_clr_flags(worker, WORKER_PREP);
+
+	do {
 		struct work_struct *work =
 			list_first_entry(&gcwq->worklist,
 					 struct work_struct, entry);
 
-		/*
-		 * The following is a rather inefficient way to close
-		 * race window against cpu hotplug operations.  Will
-		 * be replaced soon.
-		 */
-		if (unlikely(!(worker->flags & WORKER_ROGUE) &&
-			     !cpumask_equal(&worker->task->cpus_allowed,
-					    get_cpu_mask(gcwq->cpu)))) {
-			spin_unlock_irq(&gcwq->lock);
-			set_cpus_allowed_ptr(worker->task,
-					     get_cpu_mask(gcwq->cpu));
-			cpu_relax();
-			spin_lock_irq(&gcwq->lock);
-			goto recheck;
-		}
-
 		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
 			/* optimization path, not strictly necessary */
 			process_one_work(worker, work);
@@ -1224,13 +1741,19 @@ recheck:
 			move_linked_works(work, &worker->scheduled, NULL);
 			process_scheduled_works(worker);
 		}
-	}
+	} while (keep_working(gcwq));
+
+	worker_set_flags(worker, WORKER_PREP, false);
 
+	if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker))
+		goto recheck;
+sleep:
 	/*
-	 * gcwq->lock is held and there's no work to process, sleep.
-	 * Workers are woken up only while holding gcwq->lock, so
-	 * setting the current state before releasing gcwq->lock is
-	 * enough to prevent losing any event.
+	 * gcwq->lock is held and there's no work to process and no
+	 * need to manage, sleep.  Workers are woken up only while
+	 * holding gcwq->lock or from local cpu, so setting the
+	 * current state before releasing gcwq->lock is enough to
+	 * prevent losing any event.
 	 */
 	worker_enter_idle(worker);
 	__set_current_state(TASK_INTERRUPTIBLE);
@@ -1239,6 +1762,68 @@ recheck:
 	goto woke_up;
 }
 
+/**
+ * rescuer_thread - the rescuer thread function
+ * @__wq: the associated workqueue
+ *
+ * Workqueue rescuer thread function.  There's one rescuer for each
+ * workqueue which has WQ_RESCUER set.
+ *
+ * Regular work processing on a gcwq may block trying to create a new
+ * worker which uses GFP_KERNEL allocation which has slight chance of
+ * developing into deadlock if some works currently on the same queue
+ * need to be processed to satisfy the GFP_KERNEL allocation.  This is
+ * the problem rescuer solves.
+ *
+ * When such condition is possible, the gcwq summons rescuers of all
+ * workqueues which have works queued on the gcwq and let them process
+ * those works so that forward progress can be guaranteed.
+ *
+ * This should happen rarely.
+ */
+static int rescuer_thread(void *__wq)
+{
+	struct workqueue_struct *wq = __wq;
+	struct worker *rescuer = wq->rescuer;
+	struct list_head *scheduled = &rescuer->scheduled;
+	unsigned int cpu;
+
+	set_user_nice(current, RESCUER_NICE_LEVEL);
+repeat:
+	set_current_state(TASK_INTERRUPTIBLE);
+
+	if (kthread_should_stop())
+		return 0;
+
+	for_each_cpu(cpu, wq->mayday_mask) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		struct global_cwq *gcwq = cwq->gcwq;
+		struct work_struct *work, *n;
+
+		__set_current_state(TASK_RUNNING);
+		cpumask_clear_cpu(cpu, wq->mayday_mask);
+
+		/* migrate to the target cpu if possible */
+		rescuer->gcwq = gcwq;
+		worker_maybe_bind_and_lock(rescuer);
+
+		/*
+		 * Slurp in all works issued via this workqueue and
+		 * process'em.
+		 */
+		BUG_ON(!list_empty(&rescuer->scheduled));
+		list_for_each_entry_safe(work, n, &gcwq->worklist, entry)
+			if (get_work_cwq(work) == cwq)
+				move_linked_works(work, scheduled, &n);
+
+		process_scheduled_works(rescuer);
+		spin_unlock_irq(&gcwq->lock);
+	}
+
+	schedule();
+	goto repeat;
+}
+
 struct wq_barrier {
 	struct work_struct	work;
 	struct completion	done;
@@ -1998,7 +2583,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 						const char *lock_name)
 {
 	struct workqueue_struct *wq;
-	bool failed = false;
 	unsigned int cpu;
 
 	max_active = clamp_val(max_active, 1, INT_MAX);
@@ -2023,13 +2607,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	cpu_maps_update_begin();
-	/*
-	 * We must initialize cwqs for each possible cpu even if we
-	 * are going to call destroy_workqueue() finally. Otherwise
-	 * cpu_up() can hit the uninitialized cwq once we drop the
-	 * lock.
-	 */
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = get_gcwq(cpu);
@@ -2040,14 +2617,25 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 		cwq->flush_color = -1;
 		cwq->max_active = max_active;
 		INIT_LIST_HEAD(&cwq->delayed_works);
+	}
 
-		if (failed)
-			continue;
-		cwq->worker = create_worker(gcwq, cpu_online(cpu));
-		if (cwq->worker)
-			start_worker(cwq->worker);
-		else
-			failed = true;
+	if (flags & WQ_RESCUER) {
+		struct worker *rescuer;
+
+		if (!alloc_cpumask_var(&wq->mayday_mask, GFP_KERNEL))
+			goto err;
+
+		wq->rescuer = rescuer = alloc_worker();
+		if (!rescuer)
+			goto err;
+
+		rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
+		if (IS_ERR(rescuer->task))
+			goto err;
+
+		wq->rescuer = rescuer;
+		rescuer->task->flags |= PF_THREAD_BOUND;
+		wake_up_process(rescuer->task);
 	}
 
 	/*
@@ -2065,16 +2653,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 
 	spin_unlock(&workqueue_lock);
 
-	cpu_maps_update_done();
-
-	if (failed) {
-		destroy_workqueue(wq);
-		wq = NULL;
-	}
 	return wq;
 err:
 	if (wq) {
 		free_cwqs(wq->cpu_wq);
+		free_cpumask_var(wq->mayday_mask);
+		kfree(wq->rescuer);
 		kfree(wq);
 	}
 	return NULL;
@@ -2097,42 +2681,26 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	 * wq list is used to freeze wq, remove from list after
 	 * flushing is complete in case freeze races us.
 	 */
-	cpu_maps_update_begin();
 	spin_lock(&workqueue_lock);
 	list_del(&wq->list);
 	spin_unlock(&workqueue_lock);
-	cpu_maps_update_done();
 
+	/* sanity check */
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct global_cwq *gcwq = cwq->gcwq;
 		int i;
 
-		if (cwq->worker) {
-		retry:
-			spin_lock_irq(&gcwq->lock);
-			/*
-			 * Worker can only be destroyed while idle.
-			 * Wait till it becomes idle.  This is ugly
-			 * and prone to starvation.  It will go away
-			 * once dynamic worker pool is implemented.
-			 */
-			if (!(cwq->worker->flags & WORKER_IDLE)) {
-				spin_unlock_irq(&gcwq->lock);
-				msleep(100);
-				goto retry;
-			}
-			destroy_worker(cwq->worker);
-			cwq->worker = NULL;
-			spin_unlock_irq(&gcwq->lock);
-		}
-
 		for (i = 0; i < WORK_NR_COLORS; i++)
 			BUG_ON(cwq->nr_in_flight[i]);
 		BUG_ON(cwq->nr_active);
 		BUG_ON(!list_empty(&cwq->delayed_works));
 	}
 
+	if (wq->flags & WQ_RESCUER) {
+		kthread_stop(wq->rescuer->task);
+		free_cpumask_var(wq->mayday_mask);
+	}
+
 	free_cwqs(wq->cpu_wq);
 	kfree(wq);
 }
@@ -2141,10 +2709,18 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
 /*
  * CPU hotplug.
  *
- * CPU hotplug is implemented by allowing cwqs to be detached from
- * CPU, running with unbound workers and allowing them to be
- * reattached later if the cpu comes back online.  A separate thread
- * is created to govern cwqs in such state and is called the trustee.
+ * There are two challenges in supporting CPU hotplug.  Firstly, there
+ * are a lot of assumptions on strong associations among work, cwq and
+ * gcwq which make migrating pending and scheduled works very
+ * difficult to implement without impacting hot paths.  Secondly,
+ * gcwqs serve mix of short, long and very long running works making
+ * blocked draining impractical.
+ *
+ * This is solved by allowing a gcwq to be detached from CPU, running
+ * it with unbound (rogue) workers and allowing it to be reattached
+ * later if the cpu comes back online.  A separate thread is created
+ * to govern a gcwq in such state and is called the trustee of the
+ * gcwq.
  *
  * Trustee states and their descriptions.
  *
@@ -2152,11 +2728,12 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
  *		new trustee is started with this state.
  *
  * IN_CHARGE	Once started, trustee will enter this state after
- *		making all existing workers rogue.  DOWN_PREPARE waits
- *		for trustee to enter this state.  After reaching
- *		IN_CHARGE, trustee tries to execute the pending
- *		worklist until it's empty and the state is set to
- *		BUTCHER, or the state is set to RELEASE.
+ *		assuming the manager role and making all existing
+ *		workers rogue.  DOWN_PREPARE waits for trustee to
+ *		enter this state.  After reaching IN_CHARGE, trustee
+ *		tries to execute the pending worklist until it's empty
+ *		and the state is set to BUTCHER, or the state is set
+ *		to RELEASE.
  *
  * BUTCHER	Command state which is set by the cpu callback after
  *		the cpu has went down.  Once this state is set trustee
@@ -2167,7 +2744,9 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
  * RELEASE	Command state which is set by the cpu callback if the
  *		cpu down has been canceled or it has come online
  *		again.  After recognizing this state, trustee stops
- *		trying to drain or butcher and transits to DONE.
+ *		trying to drain or butcher and clears ROGUE, rebinds
+ *		all remaining workers back to the cpu and releases
+ *		manager role.
  *
  * DONE		Trustee will enter this state after BUTCHER or RELEASE
  *		is complete.
@@ -2233,17 +2812,24 @@ static int __cpuinit trustee_thread(void *__gcwq)
 {
 	struct global_cwq *gcwq = __gcwq;
 	struct worker *worker;
+	struct work_struct *work;
 	struct hlist_node *pos;
+	long rc;
 	int i;
 
 	BUG_ON(gcwq->cpu != smp_processor_id());
 
 	spin_lock_irq(&gcwq->lock);
 	/*
-	 * Make all workers rogue.  Trustee must be bound to the
-	 * target cpu and can't be cancelled.
+	 * Claim the manager position and make all workers rogue.
+	 * Trustee must be bound to the target cpu and can't be
+	 * cancelled.
 	 */
 	BUG_ON(gcwq->cpu != smp_processor_id());
+	rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS));
+	BUG_ON(rc < 0);
+
+	gcwq->flags |= GCWQ_MANAGING_WORKERS;
 
 	list_for_each_entry(worker, &gcwq->idle_list, entry)
 		worker_set_flags(worker, WORKER_ROGUE, false);
@@ -2251,6 +2837,28 @@ static int __cpuinit trustee_thread(void *__gcwq)
 	for_each_busy_worker(worker, i, pos, gcwq)
 		worker_set_flags(worker, WORKER_ROGUE, false);
 
+	/*
+	 * Call schedule() so that we cross rq->lock and thus can
+	 * guarantee sched callbacks see the rogue flag.  This is
+	 * necessary as scheduler callbacks may be invoked from other
+	 * cpus.
+	 */
+	spin_unlock_irq(&gcwq->lock);
+	schedule();
+	spin_lock_irq(&gcwq->lock);
+
+	/*
+	 * Sched callbacks are disabled now.  gcwq->nr_running should
+	 * be zero and will stay that way, making need_more_worker()
+	 * and keep_working() always return true as long as the
+	 * worklist is not empty.
+	 */
+	WARN_ON_ONCE(atomic_read(get_gcwq_nr_running(gcwq->cpu)) != 0);
+
+	spin_unlock_irq(&gcwq->lock);
+	del_timer_sync(&gcwq->idle_timer);
+	spin_lock_irq(&gcwq->lock);
+
 	/*
 	 * We're now in charge.  Notify and proceed to drain.  We need
 	 * to keep the gcwq running during the whole CPU down
@@ -2263,18 +2871,90 @@ static int __cpuinit trustee_thread(void *__gcwq)
 	/*
 	 * The original cpu is in the process of dying and may go away
 	 * anytime now.  When that happens, we and all workers would
-	 * be migrated to other cpus.  Try draining any left work.
-	 * Note that if the gcwq is frozen, there may be frozen works
-	 * in freezeable cwqs.  Don't declare completion while frozen.
+	 * be migrated to other cpus.  Try draining any left work.  We
+	 * want to get it over with ASAP - spam rescuers, wake up as
+	 * many idlers as necessary and create new ones till the
+	 * worklist is empty.  Note that if the gcwq is frozen, there
+	 * may be frozen works in freezeable cwqs.  Don't declare
+	 * completion while frozen.
 	 */
 	while (gcwq->nr_workers != gcwq->nr_idle ||
 	       gcwq->flags & GCWQ_FREEZING ||
 	       gcwq->trustee_state == TRUSTEE_IN_CHARGE) {
+		int nr_works = 0;
+
+		list_for_each_entry(work, &gcwq->worklist, entry) {
+			send_mayday(work);
+			nr_works++;
+		}
+
+		list_for_each_entry(worker, &gcwq->idle_list, entry) {
+			if (!nr_works--)
+				break;
+			wake_up_process(worker->task);
+		}
+
+		if (need_to_create_worker(gcwq)) {
+			spin_unlock_irq(&gcwq->lock);
+			worker = create_worker(gcwq, false);
+			spin_lock_irq(&gcwq->lock);
+			if (worker) {
+				worker_set_flags(worker, WORKER_ROGUE, false);
+				start_worker(worker);
+			}
+		}
+
 		/* give a breather */
 		if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0)
 			break;
 	}
 
+	/*
+	 * Either all works have been scheduled and cpu is down, or
+	 * cpu down has already been canceled.  Wait for and butcher
+	 * all workers till we're canceled.
+	 */
+	do {
+		rc = trustee_wait_event(!list_empty(&gcwq->idle_list));
+		while (!list_empty(&gcwq->idle_list))
+			destroy_worker(list_first_entry(&gcwq->idle_list,
+							struct worker, entry));
+	} while (gcwq->nr_workers && rc >= 0);
+
+	/*
+	 * At this point, either draining has completed and no worker
+	 * is left, or cpu down has been canceled or the cpu is being
+	 * brought back up.  There shouldn't be any idle one left.
+	 * Tell the remaining busy ones to rebind once it finishes the
+	 * currently scheduled works by scheduling the rebind_work.
+	 */
+	WARN_ON(!list_empty(&gcwq->idle_list));
+
+	for_each_busy_worker(worker, i, pos, gcwq) {
+		struct work_struct *rebind_work = &worker->rebind_work;
+
+		/*
+		 * Rebind_work may race with future cpu hotplug
+		 * operations.  Use a separate flag to mark that
+		 * rebinding is scheduled.
+		 */
+		worker_set_flags(worker, WORKER_REBIND, false);
+		worker_clr_flags(worker, WORKER_ROGUE);
+
+		/* queue rebind_work, wq doesn't matter, use the default one */
+		if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
+				     work_data_bits(rebind_work)))
+			continue;
+
+		debug_work_activate(rebind_work);
+		insert_work(get_cwq(gcwq->cpu, keventd_wq), rebind_work,
+			    worker->scheduled.next,
+			    work_color_to_flags(WORK_NO_COLOR));
+	}
+
+	/* relinquish manager role */
+	gcwq->flags &= ~GCWQ_MANAGING_WORKERS;
+
 	/* notify completion */
 	gcwq->trustee = NULL;
 	gcwq->trustee_state = TRUSTEE_DONE;
@@ -2313,10 +2993,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 	unsigned int cpu = (unsigned long)hcpu;
 	struct global_cwq *gcwq = get_gcwq(cpu);
 	struct task_struct *new_trustee = NULL;
-	struct worker *worker;
-	struct hlist_node *pos;
+	struct worker *uninitialized_var(new_worker);
 	unsigned long flags;
-	int i;
 
 	action &= ~CPU_TASKS_FROZEN;
 
@@ -2327,6 +3005,15 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		if (IS_ERR(new_trustee))
 			return notifier_from_errno(PTR_ERR(new_trustee));
 		kthread_bind(new_trustee, cpu);
+		/* fall through */
+	case CPU_UP_PREPARE:
+		BUG_ON(gcwq->first_idle);
+		new_worker = create_worker(gcwq, false);
+		if (!new_worker) {
+			if (new_trustee)
+				kthread_stop(new_trustee);
+			return NOTIFY_BAD;
+		}
 	}
 
 	/* some are called w/ irq disabled, don't disturb irq status */
@@ -2340,26 +3027,50 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
 		gcwq->trustee_state = TRUSTEE_START;
 		wake_up_process(gcwq->trustee);
 		wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
+		/* fall through */
+	case CPU_UP_PREPARE:
+		BUG_ON(gcwq->first_idle);
+		gcwq->first_idle = new_worker;
+		break;
+
+	case CPU_DYING:
+		/*
+		 * Before this, the trustee and all workers except for
+		 * the ones which are still executing works from
+		 * before the last CPU down must be on the cpu.  After
+		 * this, they'll all be diasporas.
+		 */
+		gcwq->flags |= GCWQ_DISASSOCIATED;
 		break;
 
 	case CPU_POST_DEAD:
 		gcwq->trustee_state = TRUSTEE_BUTCHER;
+		/* fall through */
+	case CPU_UP_CANCELED:
+		destroy_worker(gcwq->first_idle);
+		gcwq->first_idle = NULL;
 		break;
 
 	case CPU_DOWN_FAILED:
 	case CPU_ONLINE:
+		gcwq->flags &= ~GCWQ_DISASSOCIATED;
 		if (gcwq->trustee_state != TRUSTEE_DONE) {
 			gcwq->trustee_state = TRUSTEE_RELEASE;
 			wake_up_process(gcwq->trustee);
 			wait_trustee_state(gcwq, TRUSTEE_DONE);
 		}
 
-		/* clear ROGUE from all workers */
-		list_for_each_entry(worker, &gcwq->idle_list, entry)
-			worker_clr_flags(worker, WORKER_ROGUE);
-
-		for_each_busy_worker(worker, i, pos, gcwq)
-			worker_clr_flags(worker, WORKER_ROGUE);
+		/*
+		 * Trustee is done and there might be no worker left.
+		 * Put the first_idle in and request a real manager to
+		 * take a look.
+		 */
+		spin_unlock_irq(&gcwq->lock);
+		kthread_bind(gcwq->first_idle->task, cpu);
+		spin_lock_irq(&gcwq->lock);
+		gcwq->flags |= GCWQ_MANAGE_WORKERS;
+		start_worker(gcwq->first_idle);
+		gcwq->first_idle = NULL;
 		break;
 	}
 
@@ -2548,10 +3259,10 @@ void thaw_workqueues(void)
 			if (wq->single_cpu == gcwq->cpu &&
 			    !cwq->nr_active && list_empty(&cwq->delayed_works))
 				cwq_unbind_single_cpu(cwq);
-
-			wake_up_process(cwq->worker->task);
 		}
 
+		wake_up_worker(gcwq);
+
 		spin_unlock_irq(&gcwq->lock);
 	}
 
@@ -2588,12 +3299,31 @@ void __init init_workqueues(void)
 		for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
 			INIT_HLIST_HEAD(&gcwq->busy_hash[i]);
 
+		init_timer_deferrable(&gcwq->idle_timer);
+		gcwq->idle_timer.function = idle_worker_timeout;
+		gcwq->idle_timer.data = (unsigned long)gcwq;
+
+		setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout,
+			    (unsigned long)gcwq);
+
 		ida_init(&gcwq->worker_ida);
 
 		gcwq->trustee_state = TRUSTEE_DONE;
 		init_waitqueue_head(&gcwq->trustee_wait);
 	}
 
+	/* create the initial worker */
+	for_each_online_cpu(cpu) {
+		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct worker *worker;
+
+		worker = create_worker(gcwq, true);
+		BUG_ON(!worker);
+		spin_lock_irq(&gcwq->lock);
+		start_worker(worker);
+		spin_unlock_irq(&gcwq->lock);
+	}
+
 	keventd_wq = create_workqueue("events");
 	BUG_ON(!keventd_wq);
 }
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
index af040babb742..2d10fc98dc79 100644
--- a/kernel/workqueue_sched.h
+++ b/kernel/workqueue_sched.h
@@ -4,13 +4,6 @@
  * Scheduler hooks for concurrency managed workqueue.  Only to be
  * included from sched.c and workqueue.c.
  */
-static inline void wq_worker_waking_up(struct task_struct *task,
-				       unsigned int cpu)
-{
-}
-
-static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
-						     unsigned int cpu)
-{
-	return NULL;
-}
+void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task,
+				       unsigned int cpu);
-- 
cgit v1.2.3


From b71ab8c2025caef8db719aa41af0ed735dc543cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: increase max_active of keventd and kill
 current_is_keventd()

Define WQ_MAX_ACTIVE and create keventd with max_active set to half of
it which means that keventd now can process upto WQ_MAX_ACTIVE / 2 - 1
works concurrently.  Unless some combination can result in dependency
loop longer than max_active, deadlock won't happen and thus it's
unnecessary to check whether current_is_keventd() before trying to
schedule a work.  Kill current_is_keventd().

(Lockdep annotations are broken.  We need lock_map_acquire_read_norecurse())

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
---
 arch/ia64/kernel/smpboot.c |  2 +-
 arch/x86/kernel/smpboot.c  |  2 +-
 include/linux/workqueue.h  |  4 ++-
 kernel/workqueue.c         | 63 ++++++++++------------------------------------
 4 files changed, 18 insertions(+), 53 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 6a1380e90f87..99dcc85193c9 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -519,7 +519,7 @@ do_boot_cpu (int sapicid, int cpu)
 	/*
 	 * We can't use kernel_thread since we must avoid to reschedule the child.
 	 */
-	if (!keventd_up() || current_is_keventd())
+	if (!keventd_up())
 		c_idle.work.func(&c_idle.work);
 	else {
 		schedule_work(&c_idle.work);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c4f33b2e77d6..4d90f376e985 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -735,7 +735,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		goto do_rest;
 	}
 
-	if (!keventd_up() || current_is_keventd())
+	if (!keventd_up())
 		c_idle.work.func(&c_idle.work);
 	else {
 		schedule_work(&c_idle.work);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b8f4ec45c40a..33e24e734d50 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -227,6 +227,9 @@ enum {
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
+
+	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
+	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 };
 
 extern struct workqueue_struct *
@@ -280,7 +283,6 @@ extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay)
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 					unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-extern int current_is_keventd(void);
 extern int keventd_up(void);
 
 extern void init_workqueues(void);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0ad46523b423..4190e84cf995 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2398,7 +2398,6 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
 int schedule_on_each_cpu(work_func_t func)
 {
 	int cpu;
-	int orig = -1;
 	struct work_struct *works;
 
 	works = alloc_percpu(struct work_struct);
@@ -2407,23 +2406,12 @@ int schedule_on_each_cpu(work_func_t func)
 
 	get_online_cpus();
 
-	/*
-	 * When running in keventd don't schedule a work item on
-	 * itself.  Can just call directly because the work queue is
-	 * already bound.  This also is faster.
-	 */
-	if (current_is_keventd())
-		orig = raw_smp_processor_id();
-
 	for_each_online_cpu(cpu) {
 		struct work_struct *work = per_cpu_ptr(works, cpu);
 
 		INIT_WORK(work, func);
-		if (cpu != orig)
-			schedule_work_on(cpu, work);
+		schedule_work_on(cpu, work);
 	}
-	if (orig >= 0)
-		func(per_cpu_ptr(works, orig));
 
 	for_each_online_cpu(cpu)
 		flush_work(per_cpu_ptr(works, cpu));
@@ -2494,41 +2482,6 @@ int keventd_up(void)
 	return keventd_wq != NULL;
 }
 
-int current_is_keventd(void)
-{
-	bool found = false;
-	unsigned int cpu;
-
-	/*
-	 * There no longer is one-to-one relation between worker and
-	 * work queue and a worker task might be unbound from its cpu
-	 * if the cpu was offlined.  Match all busy workers.  This
-	 * function will go away once dynamic pool is implemented.
-	 */
-	for_each_possible_cpu(cpu) {
-		struct global_cwq *gcwq = get_gcwq(cpu);
-		struct worker *worker;
-		struct hlist_node *pos;
-		unsigned long flags;
-		int i;
-
-		spin_lock_irqsave(&gcwq->lock, flags);
-
-		for_each_busy_worker(worker, i, pos, gcwq) {
-			if (worker->task == current) {
-				found = true;
-				break;
-			}
-		}
-
-		spin_unlock_irqrestore(&gcwq->lock, flags);
-		if (found)
-			break;
-	}
-
-	return found;
-}
-
 static struct cpu_workqueue_struct *alloc_cwqs(void)
 {
 	/*
@@ -2576,6 +2529,16 @@ static void free_cwqs(struct cpu_workqueue_struct *cwqs)
 #endif
 }
 
+static int wq_clamp_max_active(int max_active, const char *name)
+{
+	if (max_active < 1 || max_active > WQ_MAX_ACTIVE)
+		printk(KERN_WARNING "workqueue: max_active %d requested for %s "
+		       "is out of range, clamping between %d and %d\n",
+		       max_active, name, 1, WQ_MAX_ACTIVE);
+
+	return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
+}
+
 struct workqueue_struct *__create_workqueue_key(const char *name,
 						unsigned int flags,
 						int max_active,
@@ -2585,7 +2548,7 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
-	max_active = clamp_val(max_active, 1, INT_MAX);
+	max_active = wq_clamp_max_active(max_active, name);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
@@ -3324,6 +3287,6 @@ void __init init_workqueues(void)
 		spin_unlock_irq(&gcwq->lock);
 	}
 
-	keventd_wq = create_workqueue("events");
+	keventd_wq = __create_workqueue("events", 0, WQ_DFL_ACTIVE);
 	BUG_ON(!keventd_wq);
 }
-- 
cgit v1.2.3


From d320c03830b17af64e4547075003b1eeb274bc6c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: s/__create_workqueue()/alloc_workqueue()/, and add system
 workqueues

This patch makes changes to make new workqueue features available to
its users.

* Now that workqueue is more featureful, there should be a public
  workqueue creation function which takes paramters to control them.
  Rename __create_workqueue() to alloc_workqueue() and make 0
  max_active mean WQ_DFL_ACTIVE.  In the long run, all
  create_workqueue_*() will be converted over to alloc_workqueue().

* To further unify access interface, rename keventd_wq to system_wq
  and export it.

* Add system_long_wq and system_nrt_wq.  The former is to host long
  running works separately (so that flush_scheduled_work() dosen't
  take so long) and the latter guarantees any queued work item is
  never executed in parallel by multiple CPUs.  These will be used by
  future patches to update workqueue users.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 40 +++++++++++++++++++++++++++++-----------
 kernel/workqueue.c        | 42 +++++++++++++++++++++++++-----------------
 2 files changed, 54 insertions(+), 28 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 33e24e734d50..48b7422f25ae 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -232,12 +232,31 @@ enum {
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 };
 
+/*
+ * System-wide workqueues which are always present.
+ *
+ * system_wq is the one used by schedule[_delayed]_work[_on]().
+ * Multi-CPU multi-threaded.  There are users which expect relatively
+ * short queue flush time.  Don't queue works which can run for too
+ * long.
+ *
+ * system_long_wq is similar to system_wq but may host long running
+ * works.  Queue flushing might take relatively long.
+ *
+ * system_nrt_wq is non-reentrant and guarantees that any given work
+ * item is never executed in parallel by multiple CPUs.  Queue
+ * flushing might take relatively long.
+ */
+extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_long_wq;
+extern struct workqueue_struct *system_nrt_wq;
+
 extern struct workqueue_struct *
-__create_workqueue_key(const char *name, unsigned int flags, int max_active,
-		       struct lock_class_key *key, const char *lock_name);
+__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
+		      struct lock_class_key *key, const char *lock_name);
 
 #ifdef CONFIG_LOCKDEP
-#define __create_workqueue(name, flags, max_active)		\
+#define alloc_workqueue(name, flags, max_active)		\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
@@ -247,21 +266,20 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active,
 	else							\
 		__lock_name = #name;				\
 								\
-	__create_workqueue_key((name), (flags), (max_active),	\
-				&__key, __lock_name);		\
+	__alloc_workqueue_key((name), (flags), (max_active),	\
+			      &__key, __lock_name);		\
 })
 #else
-#define __create_workqueue(name, flags, max_active)		\
-	__create_workqueue_key((name), (flags), (max_active), NULL, NULL)
+#define alloc_workqueue(name, flags, max_active)		\
+	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
 #endif
 
 #define create_workqueue(name)					\
-	__create_workqueue((name), WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-	__create_workqueue((name),				\
-			   WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
 #define create_singlethread_workqueue(name)			\
-	__create_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4190e84cf995..16ce617974d2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -210,6 +210,13 @@ struct workqueue_struct {
 #endif
 };
 
+struct workqueue_struct *system_wq __read_mostly;
+struct workqueue_struct *system_long_wq __read_mostly;
+struct workqueue_struct *system_nrt_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_wq);
+EXPORT_SYMBOL_GPL(system_long_wq);
+EXPORT_SYMBOL_GPL(system_nrt_wq);
+
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
 	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
 		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
@@ -2306,8 +2313,6 @@ int cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-static struct workqueue_struct *keventd_wq __read_mostly;
-
 /**
  * schedule_work - put work task in global workqueue
  * @work: job to be done
@@ -2321,7 +2326,7 @@ static struct workqueue_struct *keventd_wq __read_mostly;
  */
 int schedule_work(struct work_struct *work)
 {
-	return queue_work(keventd_wq, work);
+	return queue_work(system_wq, work);
 }
 EXPORT_SYMBOL(schedule_work);
 
@@ -2334,7 +2339,7 @@ EXPORT_SYMBOL(schedule_work);
  */
 int schedule_work_on(int cpu, struct work_struct *work)
 {
-	return queue_work_on(cpu, keventd_wq, work);
+	return queue_work_on(cpu, system_wq, work);
 }
 EXPORT_SYMBOL(schedule_work_on);
 
@@ -2349,7 +2354,7 @@ EXPORT_SYMBOL(schedule_work_on);
 int schedule_delayed_work(struct delayed_work *dwork,
 					unsigned long delay)
 {
-	return queue_delayed_work(keventd_wq, dwork, delay);
+	return queue_delayed_work(system_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
@@ -2382,7 +2387,7 @@ EXPORT_SYMBOL(flush_delayed_work);
 int schedule_delayed_work_on(int cpu,
 			struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
+	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work_on);
 
@@ -2447,7 +2452,7 @@ int schedule_on_each_cpu(work_func_t func)
  */
 void flush_scheduled_work(void)
 {
-	flush_workqueue(keventd_wq);
+	flush_workqueue(system_wq);
 }
 EXPORT_SYMBOL(flush_scheduled_work);
 
@@ -2479,7 +2484,7 @@ EXPORT_SYMBOL_GPL(execute_in_process_context);
 
 int keventd_up(void)
 {
-	return keventd_wq != NULL;
+	return system_wq != NULL;
 }
 
 static struct cpu_workqueue_struct *alloc_cwqs(void)
@@ -2539,15 +2544,16 @@ static int wq_clamp_max_active(int max_active, const char *name)
 	return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
 }
 
-struct workqueue_struct *__create_workqueue_key(const char *name,
-						unsigned int flags,
-						int max_active,
-						struct lock_class_key *key,
-						const char *lock_name)
+struct workqueue_struct *__alloc_workqueue_key(const char *name,
+					       unsigned int flags,
+					       int max_active,
+					       struct lock_class_key *key,
+					       const char *lock_name)
 {
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
+	max_active = max_active ?: WQ_DFL_ACTIVE;
 	max_active = wq_clamp_max_active(max_active, name);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
@@ -2626,7 +2632,7 @@ err:
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(__create_workqueue_key);
+EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
 
 /**
  * destroy_workqueue - safely terminate a workqueue
@@ -2910,7 +2916,7 @@ static int __cpuinit trustee_thread(void *__gcwq)
 			continue;
 
 		debug_work_activate(rebind_work);
-		insert_work(get_cwq(gcwq->cpu, keventd_wq), rebind_work,
+		insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -3287,6 +3293,8 @@ void __init init_workqueues(void)
 		spin_unlock_irq(&gcwq->lock);
 	}
 
-	keventd_wq = __create_workqueue("events", 0, WQ_DFL_ACTIVE);
-	BUG_ON(!keventd_wq);
+	system_wq = alloc_workqueue("events", 0, 0);
+	system_long_wq = alloc_workqueue("events_long", 0, 0);
+	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
+	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
 }
-- 
cgit v1.2.3


From dcd989cb73ab0f7b722d64ab6516f101d9f43f88 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: implement several utility APIs

Implement the following utility APIs.

 workqueue_set_max_active()	: adjust max_active of a wq
 workqueue_congested()		: test whether a wq is contested
 work_cpu()			: determine the last / current cpu of a work
 work_busy()			: query whether a work is busy

* Anton Blanchard fixed missing ret initialization in work_busy().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Anton Blanchard <anton@samba.org>
---
 include/linux/workqueue.h |  11 ++++-
 kernel/workqueue.c        | 108 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 117 insertions(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 48b7422f25ae..0a7f79729380 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -61,6 +61,10 @@ enum {
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
 	WORK_STRUCT_NO_CPU	= NR_CPUS << WORK_STRUCT_FLAG_BITS,
+
+	/* bit mask for work_busy() return values */
+	WORK_BUSY_PENDING	= 1 << 0,
+	WORK_BUSY_RUNNING	= 1 << 1,
 };
 
 struct work_struct {
@@ -307,9 +311,14 @@ extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern int flush_work(struct work_struct *work);
-
 extern int cancel_work_sync(struct work_struct *work);
 
+extern void workqueue_set_max_active(struct workqueue_struct *wq,
+				     int max_active);
+extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
+extern unsigned int work_cpu(struct work_struct *work);
+extern unsigned int work_busy(struct work_struct *work);
+
 /*
  * Kill off a pending schedule_delayed_work().  Note that the work callback
  * function may still be running on return from cancel_delayed_work(), unless
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 16ce617974d2..c1aa65c2ff38 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -203,7 +203,7 @@ struct workqueue_struct {
 	cpumask_var_t		mayday_mask;	/* cpus requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
-	int			saved_max_active; /* I: saved cwq max_active */
+	int			saved_max_active; /* W: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
@@ -2675,6 +2675,112 @@ void destroy_workqueue(struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
+/**
+ * workqueue_set_max_active - adjust max_active of a workqueue
+ * @wq: target workqueue
+ * @max_active: new max_active value.
+ *
+ * Set max_active of @wq to @max_active.
+ *
+ * CONTEXT:
+ * Don't call from IRQ context.
+ */
+void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
+{
+	unsigned int cpu;
+
+	max_active = wq_clamp_max_active(max_active, wq->name);
+
+	spin_lock(&workqueue_lock);
+
+	wq->saved_max_active = max_active;
+
+	for_each_possible_cpu(cpu) {
+		struct global_cwq *gcwq = get_gcwq(cpu);
+
+		spin_lock_irq(&gcwq->lock);
+
+		if (!(wq->flags & WQ_FREEZEABLE) ||
+		    !(gcwq->flags & GCWQ_FREEZING))
+			get_cwq(gcwq->cpu, wq)->max_active = max_active;
+
+		spin_unlock_irq(&gcwq->lock);
+	}
+
+	spin_unlock(&workqueue_lock);
+}
+EXPORT_SYMBOL_GPL(workqueue_set_max_active);
+
+/**
+ * workqueue_congested - test whether a workqueue is congested
+ * @cpu: CPU in question
+ * @wq: target workqueue
+ *
+ * Test whether @wq's cpu workqueue for @cpu is congested.  There is
+ * no synchronization around this function and the test result is
+ * unreliable and only useful as advisory hints or for debugging.
+ *
+ * RETURNS:
+ * %true if congested, %false otherwise.
+ */
+bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
+{
+	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+	return !list_empty(&cwq->delayed_works);
+}
+EXPORT_SYMBOL_GPL(workqueue_congested);
+
+/**
+ * work_cpu - return the last known associated cpu for @work
+ * @work: the work of interest
+ *
+ * RETURNS:
+ * CPU number if @work was ever queued.  NR_CPUS otherwise.
+ */
+unsigned int work_cpu(struct work_struct *work)
+{
+	struct global_cwq *gcwq = get_work_gcwq(work);
+
+	return gcwq ? gcwq->cpu : NR_CPUS;
+}
+EXPORT_SYMBOL_GPL(work_cpu);
+
+/**
+ * work_busy - test whether a work is currently pending or running
+ * @work: the work to be tested
+ *
+ * Test whether @work is currently pending or running.  There is no
+ * synchronization around this function and the test result is
+ * unreliable and only useful as advisory hints or for debugging.
+ * Especially for reentrant wqs, the pending state might hide the
+ * running state.
+ *
+ * RETURNS:
+ * OR'd bitmask of WORK_BUSY_* bits.
+ */
+unsigned int work_busy(struct work_struct *work)
+{
+	struct global_cwq *gcwq = get_work_gcwq(work);
+	unsigned long flags;
+	unsigned int ret = 0;
+
+	if (!gcwq)
+		return false;
+
+	spin_lock_irqsave(&gcwq->lock, flags);
+
+	if (work_pending(work))
+		ret |= WORK_BUSY_PENDING;
+	if (find_worker_executing_work(gcwq, work))
+		ret |= WORK_BUSY_RUNNING;
+
+	spin_unlock_irqrestore(&gcwq->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(work_busy);
+
 /*
  * CPU hotplug.
  *
-- 
cgit v1.2.3


From 649027d73a6309ac34dc2886362e662bd73456dc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:14 +0200
Subject: workqueue: implement high priority workqueue

This patch implements high priority workqueue which can be specified
with WQ_HIGHPRI flag on creation.  A high priority workqueue has the
following properties.

* A work queued to it is queued at the head of the worklist of the
  respective gcwq after other highpri works, while normal works are
  always appended at the end.

* As long as there are highpri works on gcwq->worklist,
  [__]need_more_worker() remains %true and process_one_work() wakes up
  another worker before it start executing a work.

The above two properties guarantee that works queued to high priority
workqueues are dispatched to workers and start execution as soon as
possible regardless of the state of other works.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 70 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0a7f79729380..006dcf7e808a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -231,6 +231,7 @@ enum {
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
+	WQ_HIGHPRI		= 1 << 4, /* high priority */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c1aa65c2ff38..5775717288d5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -43,6 +43,7 @@ enum {
 	GCWQ_MANAGING_WORKERS	= 1 << 1,	/* managing workers */
 	GCWQ_DISASSOCIATED	= 1 << 2,	/* cpu can't serve workers */
 	GCWQ_FREEZING		= 1 << 3,	/* freeze in progress */
+	GCWQ_HIGHPRI_PENDING	= 1 << 4,	/* highpri works on queue */
 
 	/* worker flags */
 	WORKER_STARTED		= 1 << 0,	/* started */
@@ -452,15 +453,19 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
  * assume that they're being called with gcwq->lock held.
  */
 
+static bool __need_more_worker(struct global_cwq *gcwq)
+{
+	return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) ||
+		gcwq->flags & GCWQ_HIGHPRI_PENDING;
+}
+
 /*
  * Need to wake up a worker?  Called from anything but currently
  * running workers.
  */
 static bool need_more_worker(struct global_cwq *gcwq)
 {
-	atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu);
-
-	return !list_empty(&gcwq->worklist) && !atomic_read(nr_running);
+	return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq);
 }
 
 /* Can I start working?  Called from busy but !running workers. */
@@ -733,6 +738,43 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq,
 					    work);
 }
 
+/**
+ * gcwq_determine_ins_pos - find insertion position
+ * @gcwq: gcwq of interest
+ * @cwq: cwq a work is being queued for
+ *
+ * A work for @cwq is about to be queued on @gcwq, determine insertion
+ * position for the work.  If @cwq is for HIGHPRI wq, the work is
+ * queued at the head of the queue but in FIFO order with respect to
+ * other HIGHPRI works; otherwise, at the end of the queue.  This
+ * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that
+ * there are HIGHPRI works pending.
+ *
+ * CONTEXT:
+ * spin_lock_irq(gcwq->lock).
+ *
+ * RETURNS:
+ * Pointer to inserstion position.
+ */
+static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq,
+					       struct cpu_workqueue_struct *cwq)
+{
+	struct work_struct *twork;
+
+	if (likely(!(cwq->wq->flags & WQ_HIGHPRI)))
+		return &gcwq->worklist;
+
+	list_for_each_entry(twork, &gcwq->worklist, entry) {
+		struct cpu_workqueue_struct *tcwq = get_work_cwq(twork);
+
+		if (!(tcwq->wq->flags & WQ_HIGHPRI))
+			break;
+	}
+
+	gcwq->flags |= GCWQ_HIGHPRI_PENDING;
+	return &twork->entry;
+}
+
 /**
  * insert_work - insert a work into gcwq
  * @cwq: cwq @work belongs to
@@ -770,7 +812,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 	 */
 	smp_mb();
 
-	if (!atomic_read(get_gcwq_nr_running(gcwq->cpu)))
+	if (__need_more_worker(gcwq))
 		wake_up_worker(gcwq);
 }
 
@@ -887,7 +929,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	if (likely(cwq->nr_active < cwq->max_active)) {
 		cwq->nr_active++;
-		worklist = &gcwq->worklist;
+		worklist = gcwq_determine_ins_pos(gcwq, cwq);
 	} else
 		worklist = &cwq->delayed_works;
 
@@ -1526,8 +1568,9 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 {
 	struct work_struct *work = list_first_entry(&cwq->delayed_works,
 						    struct work_struct, entry);
+	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
 
-	move_linked_works(work, &cwq->gcwq->worklist, NULL);
+	move_linked_works(work, pos, NULL);
 	cwq->nr_active++;
 }
 
@@ -1634,6 +1677,21 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	set_work_cpu(work, gcwq->cpu);
 	list_del_init(&work->entry);
 
+	/*
+	 * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI,
+	 * wake up another worker; otherwise, clear HIGHPRI_PENDING.
+	 */
+	if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) {
+		struct work_struct *nwork = list_first_entry(&gcwq->worklist,
+						struct work_struct, entry);
+
+		if (!list_empty(&gcwq->worklist) &&
+		    get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI)
+			wake_up_worker(gcwq);
+		else
+			gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
+	}
+
 	spin_unlock_irq(&gcwq->lock);
 
 	work_clear_pending(work);
-- 
cgit v1.2.3


From fb0e7beb5c1b6fb4da786ba709d7138373d5fb22 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 29 Jun 2010 10:07:15 +0200
Subject: workqueue: implement cpu intensive workqueue

This patch implements cpu intensive workqueue which can be specified
with WQ_CPU_INTENSIVE flag on creation.  Works queued to a cpu
intensive workqueue don't participate in concurrency management.  IOW,
it doesn't contribute to gcwq->nr_running and thus doesn't delay
excution of other works.

Note that although cpu intensive works won't delay other works, they
can be delayed by other works.  Combine with WQ_HIGHPRI to avoid being
delayed by other works too.

As the name suggests this is useful when using workqueue for cpu
intensive works.  Workers executing cpu intensive works are not
considered for workqueue concurrency management and left for the
scheduler to manage.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 006dcf7e808a..3f36d37ac5ba 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -232,6 +232,7 @@ enum {
 	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
+	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5775717288d5..6fa847c5c5e9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -52,8 +52,10 @@ enum {
 	WORKER_PREP		= 1 << 3,	/* preparing to run works */
 	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
 	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
+	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
 
-	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND,
+	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
+				  WORKER_CPU_INTENSIVE,
 
 	/* gcwq->trustee_state */
 	TRUSTEE_START		= 0,		/* start */
@@ -1641,6 +1643,7 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 	struct global_cwq *gcwq = cwq->gcwq;
 	struct hlist_head *bwh = busy_worker_head(gcwq, work);
+	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
 	work_func_t f = work->func;
 	int work_color;
 	struct worker *collision;
@@ -1692,6 +1695,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 			gcwq->flags &= ~GCWQ_HIGHPRI_PENDING;
 	}
 
+	/*
+	 * CPU intensive works don't participate in concurrency
+	 * management.  They're the scheduler's responsibility.
+	 */
+	if (unlikely(cpu_intensive))
+		worker_set_flags(worker, WORKER_CPU_INTENSIVE, true);
+
 	spin_unlock_irq(&gcwq->lock);
 
 	work_clear_pending(work);
@@ -1713,6 +1723,10 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 
 	spin_lock_irq(&gcwq->lock);
 
+	/* clear cpu intensive status */
+	if (unlikely(cpu_intensive))
+		worker_clr_flags(worker, WORKER_CPU_INTENSIVE);
+
 	/* we're done with it, release */
 	hlist_del_init(&worker->hentry);
 	worker->current_work = NULL;
-- 
cgit v1.2.3


From bdbc5dd7de5d07d6c9d3536e598956165a031d4c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:51 +0200
Subject: workqueue: prepare for WQ_UNBOUND implementation

In preparation of WQ_UNBOUND addition, make the following changes.

* Add WORK_CPU_* constants for pseudo cpu id numbers used (currently
  only WORK_CPU_NONE) and use them instead of NR_CPUS.  This is to
  allow another pseudo cpu id for unbound cpu.

* Reorder WQ_* flags.

* Make workqueue_struct->cpu_wq a union which contains a percpu
  pointer, regular pointer and an unsigned long value and use
  kzalloc/kfree() in UP allocation path.  This will be used to
  implement unbound workqueues which will use only one cwq on SMPs.

* Move alloc_cwqs() allocation after initialization of wq fields, so
  that alloc_cwqs() has access to wq->flags.

* Trivial relocation of wq local variables in freeze functions.

These changes don't cause any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 10 ++++--
 kernel/workqueue.c        | 83 ++++++++++++++++++++++++-----------------------
 2 files changed, 50 insertions(+), 43 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 3f36d37ac5ba..139069a6286c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -50,6 +50,10 @@ enum {
 	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
+	/* special cpu IDs */
+	WORK_CPU_NONE		= NR_CPUS,
+	WORK_CPU_LAST		= WORK_CPU_NONE,
+
 	/*
 	 * Reserve 6 bits off of cwq pointer w/ debugobjects turned
 	 * off.  This makes cwqs aligned to 64 bytes which isn't too
@@ -60,7 +64,7 @@ enum {
 
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= NR_CPUS << WORK_STRUCT_FLAG_BITS,
+	WORK_STRUCT_NO_CPU	= WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -227,9 +231,9 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
 enum {
-	WQ_FREEZEABLE		= 1 << 0, /* freeze during suspend */
+	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
-	WQ_NON_REENTRANT	= 1 << 2, /* guarantee non-reentrance */
+	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2eb9fbddf5c6..a105ddf55f79 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -190,7 +190,11 @@ struct wq_flusher {
  */
 struct workqueue_struct {
 	unsigned int		flags;		/* I: WQ_* flags */
-	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
+	union {
+		struct cpu_workqueue_struct __percpu	*pcpu;
+		struct cpu_workqueue_struct		*single;
+		unsigned long				v;
+	} cpu_wq;				/* I: cwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
@@ -362,7 +366,11 @@ static atomic_t *get_gcwq_nr_running(unsigned int cpu)
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
-	return per_cpu_ptr(wq->cpu_wq, cpu);
+#ifndef CONFIG_SMP
+	return wq->cpu_wq.single;
+#else
+	return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+#endif
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -442,7 +450,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 		return ((struct cpu_workqueue_struct *)data)->gcwq;
 
 	cpu = data >> WORK_STRUCT_FLAG_BITS;
-	if (cpu == NR_CPUS)
+	if (cpu == WORK_CPU_NONE)
 		return NULL;
 
 	BUG_ON(cpu >= nr_cpu_ids);
@@ -846,7 +854,7 @@ static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
 	 */
 	if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
 		smp_wmb();	/* paired with cmpxchg() in __queue_work() */
-		wq->single_cpu = NR_CPUS;
+		wq->single_cpu = WORK_CPU_NONE;
 	}
 }
 
@@ -904,7 +912,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 */
 	retry:
 		cpu = wq->single_cpu;
-		arbitrate = cpu == NR_CPUS;
+		arbitrate = cpu == WORK_CPU_NONE;
 		if (arbitrate)
 			cpu = req_cpu;
 
@@ -918,7 +926,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * visible on the new cpu after this point.
 		 */
 		if (arbitrate)
-			cmpxchg(&wq->single_cpu, NR_CPUS, cpu);
+			cmpxchg(&wq->single_cpu, WORK_CPU_NONE, cpu);
 
 		if (unlikely(wq->single_cpu != cpu)) {
 			spin_unlock_irqrestore(&gcwq->lock, flags);
@@ -2572,7 +2580,7 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static struct cpu_workqueue_struct *alloc_cwqs(void)
+static int alloc_cwqs(struct workqueue_struct *wq)
 {
 	/*
 	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
@@ -2582,40 +2590,36 @@ static struct cpu_workqueue_struct *alloc_cwqs(void)
 	const size_t size = sizeof(struct cpu_workqueue_struct);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
-	struct cpu_workqueue_struct *cwqs;
 #ifndef CONFIG_SMP
 	void *ptr;
 
 	/*
-	 * On UP, percpu allocator doesn't honor alignment parameter
-	 * and simply uses arch-dependent default.  Allocate enough
-	 * room to align cwq and put an extra pointer at the end
-	 * pointing back to the originally allocated pointer which
-	 * will be used for free.
-	 *
-	 * FIXME: This really belongs to UP percpu code.  Update UP
-	 * percpu code to honor alignment and remove this ugliness.
+	 * Allocate enough room to align cwq and put an extra pointer
+	 * at the end pointing back to the originally allocated
+	 * pointer which will be used for free.
 	 */
-	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
-	cwqs = PTR_ALIGN(ptr, align);
-	*(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
+	ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
+	if (ptr) {
+		wq->cpu_wq.single = PTR_ALIGN(ptr, align);
+		*(void **)(wq->cpu_wq.single + 1) = ptr;
+	}
 #else
-	/* On SMP, percpu allocator can do it itself */
-	cwqs = __alloc_percpu(size, align);
+	/* On SMP, percpu allocator can align itself */
+	wq->cpu_wq.pcpu = __alloc_percpu(size, align);
 #endif
 	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
-	return cwqs;
+	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
+	return wq->cpu_wq.v ? 0 : -ENOMEM;
 }
 
-static void free_cwqs(struct cpu_workqueue_struct *cwqs)
+static void free_cwqs(struct workqueue_struct *wq)
 {
 #ifndef CONFIG_SMP
 	/* on UP, the pointer to free is stored right after the cwq */
-	if (cwqs)
-		free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
+	if (wq->cpu_wq.single)
+		kfree(*(void **)(wq->cpu_wq.single + 1));
 #else
-	free_percpu(cwqs);
+	free_percpu(wq->cpu_wq.pcpu);
 #endif
 }
 
@@ -2645,22 +2649,21 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	if (!wq)
 		goto err;
 
-	wq->cpu_wq = alloc_cwqs();
-	if (!wq->cpu_wq)
-		goto err;
-
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
-	wq->single_cpu = NR_CPUS;
+	wq->single_cpu = WORK_CPU_NONE;
 
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
+	if (alloc_cwqs(wq) < 0)
+		goto err;
+
 	for_each_possible_cpu(cpu) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = get_gcwq(cpu);
@@ -2710,7 +2713,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	return wq;
 err:
 	if (wq) {
-		free_cwqs(wq->cpu_wq);
+		free_cwqs(wq);
 		free_cpumask_var(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
@@ -2755,7 +2758,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		free_cpumask_var(wq->mayday_mask);
 	}
 
-	free_cwqs(wq->cpu_wq);
+	free_cwqs(wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
@@ -2821,13 +2824,13 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * @work: the work of interest
  *
  * RETURNS:
- * CPU number if @work was ever queued.  NR_CPUS otherwise.
+ * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
  */
 unsigned int work_cpu(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
 
-	return gcwq ? gcwq->cpu : NR_CPUS;
+	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
 }
 EXPORT_SYMBOL_GPL(work_cpu);
 
@@ -3300,7 +3303,6 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
  */
 void freeze_workqueues_begin(void)
 {
-	struct workqueue_struct *wq;
 	unsigned int cpu;
 
 	spin_lock(&workqueue_lock);
@@ -3310,6 +3312,7 @@ void freeze_workqueues_begin(void)
 
 	for_each_possible_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct workqueue_struct *wq;
 
 		spin_lock_irq(&gcwq->lock);
 
@@ -3344,7 +3347,6 @@ void freeze_workqueues_begin(void)
  */
 bool freeze_workqueues_busy(void)
 {
-	struct workqueue_struct *wq;
 	unsigned int cpu;
 	bool busy = false;
 
@@ -3353,6 +3355,7 @@ bool freeze_workqueues_busy(void)
 	BUG_ON(!workqueue_freezing);
 
 	for_each_possible_cpu(cpu) {
+		struct workqueue_struct *wq;
 		/*
 		 * nr_active is monotonically decreasing.  It's safe
 		 * to peek without lock.
@@ -3386,7 +3389,6 @@ out_unlock:
  */
 void thaw_workqueues(void)
 {
-	struct workqueue_struct *wq;
 	unsigned int cpu;
 
 	spin_lock(&workqueue_lock);
@@ -3396,6 +3398,7 @@ void thaw_workqueues(void)
 
 	for_each_possible_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
+		struct workqueue_struct *wq;
 
 		spin_lock_irq(&gcwq->lock);
 
@@ -3443,7 +3446,7 @@ void __init init_workqueues(void)
 	 * sure cpu number won't overflow into kernel pointer area so
 	 * that they can be distinguished.
 	 */
-	BUILD_BUG_ON(NR_CPUS << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
+	BUILD_BUG_ON(WORK_CPU_LAST << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
 
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
-- 
cgit v1.2.3


From f34217977d717385a3e9fd7018ac39fade3964c0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:51 +0200
Subject: workqueue: implement unbound workqueue

This patch implements unbound workqueue which can be specified with
WQ_UNBOUND flag on creation.  An unbound workqueue has the following
properties.

* It uses a dedicated gcwq with a pseudo CPU number WORK_CPU_UNBOUND.
  This gcwq is always online and disassociated.

* Workers are not bound to any CPU and not concurrency managed.  Works
  are dispatched to workers as soon as possible and the only applied
  limitation is @max_active.  IOW, all unbound workqeueues are
  implicitly high priority.

Unbound workqueues can be used as simple execution context provider.
Contexts unbound to any cpu are served as soon as possible.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: David Howells <dhowells@redhat.com>
---
 include/linux/workqueue.h |  15 +++-
 kernel/workqueue.c        | 218 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 173 insertions(+), 60 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 139069a6286c..67ce734747f6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -51,7 +51,8 @@ enum {
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
 	/* special cpu IDs */
-	WORK_CPU_NONE		= NR_CPUS,
+	WORK_CPU_UNBOUND	= NR_CPUS,
+	WORK_CPU_NONE		= NR_CPUS + 1,
 	WORK_CPU_LAST		= WORK_CPU_NONE,
 
 	/*
@@ -237,11 +238,17 @@ enum {
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
+	WQ_UNBOUND		= 1 << 6, /* not bound to any cpu */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
+	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
 };
 
+/* unbound wq's aren't per-cpu, scale max_active according to #cpus */
+#define WQ_UNBOUND_MAX_ACTIVE	\
+	max_t(int, WQ_MAX_ACTIVE, num_possible_cpus() * WQ_MAX_UNBOUND_PER_CPU)
+
 /*
  * System-wide workqueues which are always present.
  *
@@ -256,10 +263,16 @@ enum {
  * system_nrt_wq is non-reentrant and guarantees that any given work
  * item is never executed in parallel by multiple CPUs.  Queue
  * flushing might take relatively long.
+ *
+ * system_unbound_wq is unbound workqueue.  Workers are not bound to
+ * any specific CPU, not concurrency managed, and all queued works are
+ * executed immediately as long as max_active limit is not reached and
+ * resources are available.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
+extern struct workqueue_struct *system_unbound_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a105ddf55f79..4608563cdd63 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -53,9 +53,10 @@ enum {
 	WORKER_ROGUE		= 1 << 4,	/* not bound to any cpu */
 	WORKER_REBIND		= 1 << 5,	/* mom is home, come back */
 	WORKER_CPU_INTENSIVE	= 1 << 6,	/* cpu intensive */
+	WORKER_UNBOUND		= 1 << 7,	/* worker is unbound */
 
 	WORKER_NOT_RUNNING	= WORKER_PREP | WORKER_ROGUE | WORKER_REBIND |
-				  WORKER_CPU_INTENSIVE,
+				  WORKER_CPU_INTENSIVE | WORKER_UNBOUND,
 
 	/* gcwq->trustee_state */
 	TRUSTEE_START		= 0,		/* start */
@@ -96,7 +97,7 @@ enum {
  * X: During normal operation, modification requires gcwq->lock and
  *    should be done only from local cpu.  Either disabling preemption
  *    on local cpu or grabbing gcwq->lock is enough for read access.
- *    While trustee is in charge, it's identical to L.
+ *    If GCWQ_DISASSOCIATED is set, it's identical to L.
  *
  * F: wq->flush_mutex protected.
  *
@@ -220,14 +221,52 @@ struct workqueue_struct {
 struct workqueue_struct *system_wq __read_mostly;
 struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
+struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
+EXPORT_SYMBOL_GPL(system_unbound_wq);
 
 #define for_each_busy_worker(worker, i, pos, gcwq)			\
 	for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)			\
 		hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry)
 
+static inline int __next_gcwq_cpu(int cpu, const struct cpumask *mask,
+				  unsigned int sw)
+{
+	if (cpu < nr_cpu_ids) {
+		if (sw & 1) {
+			cpu = cpumask_next(cpu, mask);
+			if (cpu < nr_cpu_ids)
+				return cpu;
+		}
+		if (sw & 2)
+			return WORK_CPU_UNBOUND;
+	}
+	return WORK_CPU_NONE;
+}
+
+static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
+				struct workqueue_struct *wq)
+{
+	return __next_gcwq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
+}
+
+#define for_each_gcwq_cpu(cpu)						\
+	for ((cpu) = __next_gcwq_cpu(-1, cpu_possible_mask, 3);		\
+	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) = __next_gcwq_cpu((cpu), cpu_possible_mask, 3))
+
+#define for_each_online_gcwq_cpu(cpu)					\
+	for ((cpu) = __next_gcwq_cpu(-1, cpu_online_mask, 3);		\
+	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) = __next_gcwq_cpu((cpu), cpu_online_mask, 3))
+
+#define for_each_cwq_cpu(cpu, wq)					\
+	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, (wq));	\
+	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
+
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
@@ -351,26 +390,46 @@ static bool workqueue_freezing;		/* W: have wqs started freezing? */
 static DEFINE_PER_CPU(struct global_cwq, global_cwq);
 static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running);
 
+/*
+ * Global cpu workqueue and nr_running counter for unbound gcwq.  The
+ * gcwq is always online, has GCWQ_DISASSOCIATED set, and all its
+ * workers have WORKER_UNBOUND set.
+ */
+static struct global_cwq unbound_global_cwq;
+static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0);	/* always 0 */
+
 static int worker_thread(void *__worker);
 
 static struct global_cwq *get_gcwq(unsigned int cpu)
 {
-	return &per_cpu(global_cwq, cpu);
+	if (cpu != WORK_CPU_UNBOUND)
+		return &per_cpu(global_cwq, cpu);
+	else
+		return &unbound_global_cwq;
 }
 
 static atomic_t *get_gcwq_nr_running(unsigned int cpu)
 {
-	return &per_cpu(gcwq_nr_running, cpu);
+	if (cpu != WORK_CPU_UNBOUND)
+		return &per_cpu(gcwq_nr_running, cpu);
+	else
+		return &unbound_gcwq_nr_running;
 }
 
 static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
 					    struct workqueue_struct *wq)
 {
-#ifndef CONFIG_SMP
-	return wq->cpu_wq.single;
+	if (!(wq->flags & WQ_UNBOUND)) {
+		if (likely(cpu < nr_cpu_ids)) {
+#ifdef CONFIG_SMP
+			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
 #else
-	return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+			return wq->cpu_wq.single;
 #endif
+		}
+	} else if (likely(cpu == WORK_CPU_UNBOUND))
+		return wq->cpu_wq.single;
+	return NULL;
 }
 
 static unsigned int work_color_to_flags(int color)
@@ -453,7 +512,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 	if (cpu == WORK_CPU_NONE)
 		return NULL;
 
-	BUG_ON(cpu >= nr_cpu_ids);
+	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
 	return get_gcwq(cpu);
 }
 
@@ -869,11 +928,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
+	if (unlikely(cpu == WORK_CPU_UNBOUND))
+		cpu = raw_smp_processor_id();
+
 	/*
 	 * Determine gcwq to use.  SINGLE_CPU is inherently
 	 * NON_REENTRANT, so test it first.
 	 */
-	if (!(wq->flags & WQ_SINGLE_CPU)) {
+	if (!(wq->flags & (WQ_SINGLE_CPU | WQ_UNBOUND))) {
 		struct global_cwq *last_gcwq;
 
 		/*
@@ -900,7 +962,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			}
 		} else
 			spin_lock_irqsave(&gcwq->lock, flags);
-	} else {
+	} else if (!(wq->flags & WQ_UNBOUND)) {
 		unsigned int req_cpu = cpu;
 
 		/*
@@ -932,6 +994,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			spin_unlock_irqrestore(&gcwq->lock, flags);
 			goto retry;
 		}
+	} else {
+		gcwq = get_gcwq(WORK_CPU_UNBOUND);
+		spin_lock_irqsave(&gcwq->lock, flags);
 	}
 
 	/* gcwq determined, get cwq and queue */
@@ -1166,7 +1231,8 @@ static bool worker_maybe_bind_and_lock(struct worker *worker)
 		 * it races with cpu hotunplug operation.  Verify
 		 * against GCWQ_DISASSOCIATED.
 		 */
-		set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
+		if (!(gcwq->flags & GCWQ_DISASSOCIATED))
+			set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu));
 
 		spin_lock_irq(&gcwq->lock);
 		if (gcwq->flags & GCWQ_DISASSOCIATED)
@@ -1231,8 +1297,9 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 {
-	int id = -1;
+	bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND;
 	struct worker *worker = NULL;
+	int id = -1;
 
 	spin_lock_irq(&gcwq->lock);
 	while (ida_get_new(&gcwq->worker_ida, &id)) {
@@ -1250,8 +1317,12 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	worker->gcwq = gcwq;
 	worker->id = id;
 
-	worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
-				      gcwq->cpu, id);
+	if (!on_unbound_cpu)
+		worker->task = kthread_create(worker_thread, worker,
+					      "kworker/%u:%d", gcwq->cpu, id);
+	else
+		worker->task = kthread_create(worker_thread, worker,
+					      "kworker/u:%d", id);
 	if (IS_ERR(worker->task))
 		goto fail;
 
@@ -1260,10 +1331,13 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind)
 	 * online later on.  Make sure every worker has
 	 * PF_THREAD_BOUND set.
 	 */
-	if (bind)
+	if (bind && !on_unbound_cpu)
 		kthread_bind(worker->task, gcwq->cpu);
-	else
+	else {
 		worker->task->flags |= PF_THREAD_BOUND;
+		if (on_unbound_cpu)
+			worker->flags |= WORKER_UNBOUND;
+	}
 
 	return worker;
 fail:
@@ -1358,12 +1432,17 @@ static bool send_mayday(struct work_struct *work)
 {
 	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 	struct workqueue_struct *wq = cwq->wq;
+	unsigned int cpu;
 
 	if (!(wq->flags & WQ_RESCUER))
 		return false;
 
 	/* mayday mayday mayday */
-	if (!cpumask_test_and_set_cpu(cwq->gcwq->cpu, wq->mayday_mask))
+	cpu = cwq->gcwq->cpu;
+	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
+	if (cpu == WORK_CPU_UNBOUND)
+		cpu = 0;
+	if (!cpumask_test_and_set_cpu(cpu, wq->mayday_mask))
 		wake_up_process(wq->rescuer->task);
 	return true;
 }
@@ -1882,6 +1961,7 @@ static int rescuer_thread(void *__wq)
 	struct workqueue_struct *wq = __wq;
 	struct worker *rescuer = wq->rescuer;
 	struct list_head *scheduled = &rescuer->scheduled;
+	bool is_unbound = wq->flags & WQ_UNBOUND;
 	unsigned int cpu;
 
 	set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -1891,8 +1971,13 @@ repeat:
 	if (kthread_should_stop())
 		return 0;
 
+	/*
+	 * See whether any cpu is asking for help.  Unbounded
+	 * workqueues use cpu 0 in mayday_mask for CPU_UNBOUND.
+	 */
 	for_each_cpu(cpu, wq->mayday_mask) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
+		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
 		struct global_cwq *gcwq = cwq->gcwq;
 		struct work_struct *work, *n;
 
@@ -2034,7 +2119,7 @@ static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
 		atomic_set(&wq->nr_cwqs_to_flush, 1);
 	}
 
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = cwq->gcwq;
 
@@ -2344,7 +2429,7 @@ static void wait_on_work(struct work_struct *work)
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	for_each_possible_cpu(cpu)
+	for_each_gcwq_cpu(cpu)
 		wait_on_cpu_work(get_gcwq(cpu), work);
 }
 
@@ -2590,23 +2675,25 @@ static int alloc_cwqs(struct workqueue_struct *wq)
 	const size_t size = sizeof(struct cpu_workqueue_struct);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
-#ifndef CONFIG_SMP
-	void *ptr;
 
-	/*
-	 * Allocate enough room to align cwq and put an extra pointer
-	 * at the end pointing back to the originally allocated
-	 * pointer which will be used for free.
-	 */
-	ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
-	if (ptr) {
-		wq->cpu_wq.single = PTR_ALIGN(ptr, align);
-		*(void **)(wq->cpu_wq.single + 1) = ptr;
+	if (CONFIG_SMP && !(wq->flags & WQ_UNBOUND)) {
+		/* on SMP, percpu allocator can align itself */
+		wq->cpu_wq.pcpu = __alloc_percpu(size, align);
+	} else {
+		void *ptr;
+
+		/*
+		 * Allocate enough room to align cwq and put an extra
+		 * pointer at the end pointing back to the originally
+		 * allocated pointer which will be used for free.
+		 */
+		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
+		if (ptr) {
+			wq->cpu_wq.single = PTR_ALIGN(ptr, align);
+			*(void **)(wq->cpu_wq.single + 1) = ptr;
+		}
 	}
-#else
-	/* On SMP, percpu allocator can align itself */
-	wq->cpu_wq.pcpu = __alloc_percpu(size, align);
-#endif
+
 	/* just in case, make sure it's actually aligned */
 	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
 	return wq->cpu_wq.v ? 0 : -ENOMEM;
@@ -2614,23 +2701,25 @@ static int alloc_cwqs(struct workqueue_struct *wq)
 
 static void free_cwqs(struct workqueue_struct *wq)
 {
-#ifndef CONFIG_SMP
-	/* on UP, the pointer to free is stored right after the cwq */
-	if (wq->cpu_wq.single)
+	if (CONFIG_SMP && !(wq->flags & WQ_UNBOUND))
+		free_percpu(wq->cpu_wq.pcpu);
+	else if (wq->cpu_wq.single) {
+		/* the pointer to free is stored right after the cwq */
 		kfree(*(void **)(wq->cpu_wq.single + 1));
-#else
-	free_percpu(wq->cpu_wq.pcpu);
-#endif
+	}
 }
 
-static int wq_clamp_max_active(int max_active, const char *name)
+static int wq_clamp_max_active(int max_active, unsigned int flags,
+			       const char *name)
 {
-	if (max_active < 1 || max_active > WQ_MAX_ACTIVE)
+	int lim = flags & WQ_UNBOUND ? WQ_UNBOUND_MAX_ACTIVE : WQ_MAX_ACTIVE;
+
+	if (max_active < 1 || max_active > lim)
 		printk(KERN_WARNING "workqueue: max_active %d requested for %s "
 		       "is out of range, clamping between %d and %d\n",
-		       max_active, name, 1, WQ_MAX_ACTIVE);
+		       max_active, name, 1, lim);
 
-	return clamp_val(max_active, 1, WQ_MAX_ACTIVE);
+	return clamp_val(max_active, 1, lim);
 }
 
 struct workqueue_struct *__alloc_workqueue_key(const char *name,
@@ -2642,8 +2731,15 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
+	/*
+	 * Unbound workqueues aren't concurrency managed and should be
+	 * dispatched to workers immediately.
+	 */
+	if (flags & WQ_UNBOUND)
+		flags |= WQ_HIGHPRI;
+
 	max_active = max_active ?: WQ_DFL_ACTIVE;
-	max_active = wq_clamp_max_active(max_active, name);
+	max_active = wq_clamp_max_active(max_active, flags, name);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
@@ -2664,7 +2760,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	if (alloc_cwqs(wq) < 0)
 		goto err;
 
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		struct global_cwq *gcwq = get_gcwq(cpu);
 
@@ -2703,7 +2799,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	spin_lock(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
-		for_each_possible_cpu(cpu)
+		for_each_cwq_cpu(cpu, wq)
 			get_cwq(cpu, wq)->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
@@ -2743,7 +2839,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 
 	/* sanity check */
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 		int i;
 
@@ -2777,13 +2873,13 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 {
 	unsigned int cpu;
 
-	max_active = wq_clamp_max_active(max_active, wq->name);
+	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 	spin_lock(&workqueue_lock);
 
 	wq->saved_max_active = max_active;
 
-	for_each_possible_cpu(cpu) {
+	for_each_cwq_cpu(cpu, wq) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 
 		spin_lock_irq(&gcwq->lock);
@@ -3310,7 +3406,7 @@ void freeze_workqueues_begin(void)
 	BUG_ON(workqueue_freezing);
 	workqueue_freezing = true;
 
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct workqueue_struct *wq;
 
@@ -3322,7 +3418,7 @@ void freeze_workqueues_begin(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (wq->flags & WQ_FREEZEABLE)
+			if (cwq && wq->flags & WQ_FREEZEABLE)
 				cwq->max_active = 0;
 		}
 
@@ -3354,7 +3450,7 @@ bool freeze_workqueues_busy(void)
 
 	BUG_ON(!workqueue_freezing);
 
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct workqueue_struct *wq;
 		/*
 		 * nr_active is monotonically decreasing.  It's safe
@@ -3363,7 +3459,7 @@ bool freeze_workqueues_busy(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!(wq->flags & WQ_FREEZEABLE))
+			if (!cwq || !(wq->flags & WQ_FREEZEABLE))
 				continue;
 
 			BUG_ON(cwq->nr_active < 0);
@@ -3396,7 +3492,7 @@ void thaw_workqueues(void)
 	if (!workqueue_freezing)
 		goto out_unlock;
 
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct workqueue_struct *wq;
 
@@ -3408,7 +3504,7 @@ void thaw_workqueues(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!(wq->flags & WQ_FREEZEABLE))
+			if (!cwq || !(wq->flags & WQ_FREEZEABLE))
 				continue;
 
 			/* restore max_active and repopulate worklist */
@@ -3451,12 +3547,14 @@ void __init init_workqueues(void)
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-	for_each_possible_cpu(cpu) {
+	for_each_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 
 		spin_lock_init(&gcwq->lock);
 		INIT_LIST_HEAD(&gcwq->worklist);
 		gcwq->cpu = cpu;
+		if (cpu == WORK_CPU_UNBOUND)
+			gcwq->flags |= GCWQ_DISASSOCIATED;
 
 		INIT_LIST_HEAD(&gcwq->idle_list);
 		for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++)
@@ -3476,7 +3574,7 @@ void __init init_workqueues(void)
 	}
 
 	/* create the initial worker */
-	for_each_online_cpu(cpu) {
+	for_each_online_gcwq_cpu(cpu) {
 		struct global_cwq *gcwq = get_gcwq(cpu);
 		struct worker *worker;
 
@@ -3490,5 +3588,7 @@ void __init init_workqueues(void)
 	system_wq = alloc_workqueue("events", 0, 0);
 	system_long_wq = alloc_workqueue("events_long", 0, 0);
 	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
+	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
+					    WQ_UNBOUND_MAX_ACTIVE);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
 }
-- 
cgit v1.2.3


From c7fc77f78f16d138ca997ce096a62f46e2e9420a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 2 Jul 2010 10:03:51 +0200
Subject: workqueue: remove WQ_SINGLE_CPU and use WQ_UNBOUND instead

WQ_SINGLE_CPU combined with @max_active of 1 is used to achieve full
ordering among works queued to a workqueue.  The same can be achieved
using WQ_UNBOUND as unbound workqueues always use the gcwq for
WORK_CPU_UNBOUND.  As @max_active is always one and benefits from cpu
locality isn't accessible anyway, serving them with unbound workqueues
should be fine.

Drop WQ_SINGLE_CPU support and use WQ_UNBOUND instead.  Note that most
single thread workqueue users will be converted to use multithread or
non-reentrant instead and only the ones which require strict ordering
will keep using WQ_UNBOUND + @max_active of 1.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   7 ++--
 kernel/workqueue.c        | 100 +++++++++-------------------------------------
 2 files changed, 21 insertions(+), 86 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 67ce734747f6..d74a529ed13e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -233,12 +233,11 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 
 enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
-	WQ_SINGLE_CPU		= 1 << 1, /* only single cpu at a time */
+	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
 	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
 	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
-	WQ_UNBOUND		= 1 << 6, /* not bound to any cpu */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -300,9 +299,9 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-	alloc_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1)
 #define create_singlethread_workqueue(name)			\
-	alloc_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4608563cdd63..20d6237d7498 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -206,8 +206,6 @@ struct workqueue_struct {
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
 
-	unsigned long		single_cpu;	/* cpu for single cpu wq */
-
 	cpumask_var_t		mayday_mask;	/* cpus requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
@@ -889,34 +887,6 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
 		wake_up_worker(gcwq);
 }
 
-/**
- * cwq_unbind_single_cpu - unbind cwq from single cpu workqueue processing
- * @cwq: cwq to unbind
- *
- * Try to unbind @cwq from single cpu workqueue processing.  If
- * @cwq->wq is frozen, unbind is delayed till the workqueue is thawed.
- *
- * CONTEXT:
- * spin_lock_irq(gcwq->lock).
- */
-static void cwq_unbind_single_cpu(struct cpu_workqueue_struct *cwq)
-{
-	struct workqueue_struct *wq = cwq->wq;
-	struct global_cwq *gcwq = cwq->gcwq;
-
-	BUG_ON(wq->single_cpu != gcwq->cpu);
-	/*
-	 * Unbind from workqueue if @cwq is not frozen.  If frozen,
-	 * thaw_workqueues() will either restart processing on this
-	 * cpu or unbind if empty.  This keeps works queued while
-	 * frozen fully ordered and flushable.
-	 */
-	if (likely(!(gcwq->flags & GCWQ_FREEZING))) {
-		smp_wmb();	/* paired with cmpxchg() in __queue_work() */
-		wq->single_cpu = WORK_CPU_NONE;
-	}
-}
-
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
@@ -924,20 +894,16 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
 	unsigned long flags;
-	bool arbitrate;
 
 	debug_work_activate(work);
 
-	if (unlikely(cpu == WORK_CPU_UNBOUND))
-		cpu = raw_smp_processor_id();
-
-	/*
-	 * Determine gcwq to use.  SINGLE_CPU is inherently
-	 * NON_REENTRANT, so test it first.
-	 */
-	if (!(wq->flags & (WQ_SINGLE_CPU | WQ_UNBOUND))) {
+	/* determine gcwq to use */
+	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
 
+		if (unlikely(cpu == WORK_CPU_UNBOUND))
+			cpu = raw_smp_processor_id();
+
 		/*
 		 * It's multi cpu.  If @wq is non-reentrant and @work
 		 * was previously on a different cpu, it might still
@@ -962,38 +928,6 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			}
 		} else
 			spin_lock_irqsave(&gcwq->lock, flags);
-	} else if (!(wq->flags & WQ_UNBOUND)) {
-		unsigned int req_cpu = cpu;
-
-		/*
-		 * It's a bit more complex for single cpu workqueues.
-		 * We first need to determine which cpu is going to be
-		 * used.  If no cpu is currently serving this
-		 * workqueue, arbitrate using atomic accesses to
-		 * wq->single_cpu; otherwise, use the current one.
-		 */
-	retry:
-		cpu = wq->single_cpu;
-		arbitrate = cpu == WORK_CPU_NONE;
-		if (arbitrate)
-			cpu = req_cpu;
-
-		gcwq = get_gcwq(cpu);
-		spin_lock_irqsave(&gcwq->lock, flags);
-
-		/*
-		 * The following cmpxchg() is a full barrier paired
-		 * with smp_wmb() in cwq_unbind_single_cpu() and
-		 * guarantees that all changes to wq->st_* fields are
-		 * visible on the new cpu after this point.
-		 */
-		if (arbitrate)
-			cmpxchg(&wq->single_cpu, WORK_CPU_NONE, cpu);
-
-		if (unlikely(wq->single_cpu != cpu)) {
-			spin_unlock_irqrestore(&gcwq->lock, flags);
-			goto retry;
-		}
 	} else {
 		gcwq = get_gcwq(WORK_CPU_UNBOUND);
 		spin_lock_irqsave(&gcwq->lock, flags);
@@ -1105,19 +1039,30 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	struct work_struct *work = &dwork->work;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
-		struct global_cwq *gcwq = get_work_gcwq(work);
-		unsigned int lcpu = gcwq ? gcwq->cpu : raw_smp_processor_id();
+		unsigned int lcpu;
 
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
 		timer_stats_timer_set_start_info(&dwork->timer);
+
 		/*
 		 * This stores cwq for the moment, for the timer_fn.
 		 * Note that the work's gcwq is preserved to allow
 		 * reentrance detection for delayed works.
 		 */
+		if (!(wq->flags & WQ_UNBOUND)) {
+			struct global_cwq *gcwq = get_work_gcwq(work);
+
+			if (gcwq && gcwq->cpu != WORK_CPU_UNBOUND)
+				lcpu = gcwq->cpu;
+			else
+				lcpu = raw_smp_processor_id();
+		} else
+			lcpu = WORK_CPU_UNBOUND;
+
 		set_work_cwq(work, get_cwq(lcpu, wq), 0);
+
 		timer->expires = jiffies + delay;
 		timer->data = (unsigned long)dwork;
 		timer->function = delayed_work_timer_fn;
@@ -1696,9 +1641,6 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
 		/* one down, submit a delayed one */
 		if (cwq->nr_active < cwq->max_active)
 			cwq_activate_first_delayed(cwq);
-	} else if (!cwq->nr_active && cwq->wq->flags & WQ_SINGLE_CPU) {
-		/* this was the last work, unbind from single cpu */
-		cwq_unbind_single_cpu(cwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
@@ -2751,7 +2693,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	atomic_set(&wq->nr_cwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
-	wq->single_cpu = WORK_CPU_NONE;
 
 	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
@@ -3513,11 +3454,6 @@ void thaw_workqueues(void)
 			while (!list_empty(&cwq->delayed_works) &&
 			       cwq->nr_active < cwq->max_active)
 				cwq_activate_first_delayed(cwq);
-
-			/* perform delayed unbind from single cpu if empty */
-			if (wq->single_cpu == gcwq->cpu &&
-			    !cwq->nr_active && list_empty(&cwq->delayed_works))
-				cwq_unbind_single_cpu(cwq);
 		}
 
 		wake_up_worker(gcwq);
-- 
cgit v1.2.3


From e120153ddf8620fd0a194d301e9c5a8b28483bb5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 22 Jul 2010 14:14:25 +0200
Subject: workqueue: fix how cpu number is stored in work->data

Once a work starts execution, its data contains the cpu number it was
on instead of pointing to cwq.  This is added by commit 7a22ad75
(workqueue: carry cpu number in work data once execution starts) to
reliably determine the work was last on even if the workqueue itself
was destroyed inbetween.

Whether data points to a cwq or contains a cpu number was
distinguished by comparing the value against PAGE_OFFSET.  The
assumption was that a cpu number should be below PAGE_OFFSET while a
pointer to cwq should be above it.  However, on architectures which
use separate address spaces for user and kernel spaces, this doesn't
hold as PAGE_OFFSET is zero.

Fix it by using an explicit flag, WORK_STRUCT_CWQ, to mark what the
data field contains.  If the flag is set, it's pointing to a cwq;
otherwise, it contains a cpu number.

Reported on s390 and microblaze during linux-next testing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Sachin Sant <sachinp@in.ibm.com>
Reported-by: Michal Simek <michal.simek@petalogix.com>
Reported-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Tested-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Tested-by: Michal Simek <monstr@monstr.eu>
---
 include/linux/workqueue.h | 14 ++++++++------
 kernel/workqueue.c        | 36 +++++++++++++-----------------------
 2 files changed, 21 insertions(+), 29 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d74a529ed13e..5f76001c4e6d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,17 +25,19 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
-	WORK_STRUCT_LINKED_BIT	= 1,	/* next work is linked to this one */
+	WORK_STRUCT_CWQ_BIT	= 1,	/* data points to cwq */
+	WORK_STRUCT_LINKED_BIT	= 2,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 2,	/* static initializer (debugobjects) */
-	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
+	WORK_STRUCT_STATIC_BIT	= 3,	/* static initializer (debugobjects) */
+	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_COLOR_SHIFT	= 2,	/* color for workqueue flushing */
+	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
 #endif
 
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -56,8 +58,8 @@ enum {
 	WORK_CPU_LAST		= WORK_CPU_NONE,
 
 	/*
-	 * Reserve 6 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 64 bytes which isn't too
+	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
+	 * off.  This makes cwqs aligned to 128 bytes which isn't too
 	 * excessive while allowing 15 workqueue flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c11edc9c9365..e5cb7faac58e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -468,10 +468,9 @@ static int work_next_color(int color)
 }
 
 /*
- * Work data points to the cwq while a work is on queue.  Once
- * execution starts, it points to the cpu the work was last on.  This
- * can be distinguished by comparing the data value against
- * PAGE_OFFSET.
+ * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
+ * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
+ * cleared and the work data contains the cpu number it was last on.
  *
  * set_work_{cwq|cpu}() and clear_work_data() can be used to set the
  * cwq, cpu or clear work->data.  These functions should only be
@@ -494,7 +493,7 @@ static void set_work_cwq(struct work_struct *work,
 			 unsigned long extra_flags)
 {
 	set_work_data(work, (unsigned long)cwq,
-		      WORK_STRUCT_PENDING | extra_flags);
+		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
 static void set_work_cpu(struct work_struct *work, unsigned int cpu)
@@ -507,25 +506,24 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
 }
 
-static inline unsigned long get_work_data(struct work_struct *work)
-{
-	return atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK;
-}
-
 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 {
-	unsigned long data = get_work_data(work);
+	unsigned long data = atomic_long_read(&work->data);
 
-	return data >= PAGE_OFFSET ? (void *)data : NULL;
+	if (data & WORK_STRUCT_CWQ)
+		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
+	else
+		return NULL;
 }
 
 static struct global_cwq *get_work_gcwq(struct work_struct *work)
 {
-	unsigned long data = get_work_data(work);
+	unsigned long data = atomic_long_read(&work->data);
 	unsigned int cpu;
 
-	if (data >= PAGE_OFFSET)
-		return ((struct cpu_workqueue_struct *)data)->gcwq;
+	if (data & WORK_STRUCT_CWQ)
+		return ((struct cpu_workqueue_struct *)
+			(data & WORK_STRUCT_WQ_DATA_MASK))->gcwq;
 
 	cpu = data >> WORK_STRUCT_FLAG_BITS;
 	if (cpu == WORK_CPU_NONE)
@@ -3501,14 +3499,6 @@ void __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
-	/*
-	 * The pointer part of work->data is either pointing to the
-	 * cwq or contains the cpu number the work ran last on.  Make
-	 * sure cpu number won't overflow into kernel pointer area so
-	 * that they can be distinguished.
-	 */
-	BUILD_BUG_ON(WORK_CPU_LAST << WORK_STRUCT_FLAG_BITS >= PAGE_OFFSET);
-
 	hotcpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE);
 
 	/* initialize gcwqs */
-- 
cgit v1.2.3


From 6ee0578b4daaea01c96b172c6aacca43fd9807a6 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Fri, 30 Jul 2010 14:57:37 -0700
Subject: workqueue: mark init_workqueues() as early_initcall()

Mark init_workqueues() as early_initcall() and thus it will be initialized
before smp bringup. init_workqueues() registers for the hotcpu notifier
and thus it should cope with the processors that are brought online after
the workqueues are initialized.

x86 smp bringup code uses workqueues and uses a workaround for the
cold boot process (as the workqueues are initialized post smp_init()).
Marking init_workqueues() as early_initcall() will pave the way for
cleaning up this code.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h | 1 -
 init/main.c               | 2 --
 kernel/workqueue.c        | 4 +++-
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5f76001c4e6d..51dc9a727e5e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -327,7 +327,6 @@ extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
-extern void init_workqueues(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern int flush_work(struct work_struct *work);
diff --git a/init/main.c b/init/main.c
index 3bdb152f412f..5f2ec2cdd900 100644
--- a/init/main.c
+++ b/init/main.c
@@ -32,7 +32,6 @@
 #include <linux/start_kernel.h>
 #include <linux/security.h>
 #include <linux/smp.h>
-#include <linux/workqueue.h>
 #include <linux/profile.h>
 #include <linux/rcupdate.h>
 #include <linux/moduleparam.h>
@@ -786,7 +785,6 @@ static void __init do_initcalls(void)
  */
 static void __init do_basic_setup(void)
 {
-	init_workqueues();
 	cpuset_init_smp();
 	usermodehelper_init();
 	init_tmpfs();
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1105c474073a..e2eb351d9152 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3507,7 +3507,7 @@ out_unlock:
 }
 #endif /* CONFIG_FREEZER */
 
-void __init init_workqueues(void)
+static int __init init_workqueues(void)
 {
 	unsigned int cpu;
 	int i;
@@ -3559,4 +3559,6 @@ void __init init_workqueues(void)
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq);
+	return 0;
 }
+early_initcall(init_workqueues);
-- 
cgit v1.2.3


From e41e704bc4f49057fc68b643108366e6e6781aa3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 24 Aug 2010 14:22:47 +0200
Subject: workqueue: improve destroy_workqueue() debuggability

Now that the worklist is global, having works pending after wq
destruction can easily lead to oops and destroy_workqueue() have
several BUG_ON()s to catch these cases.  Unfortunately, BUG_ON()
doesn't tell much about how the work became pending after the final
flush_workqueue().

This patch adds WQ_DYING which is set before the final flush begins.
If a work is requested to be queued on a dying workqueue,
WARN_ON_ONCE() is triggered and the request is ignored.  This clearly
indicates which caller is trying to queue a work on a dying workqueue
and keeps the system working in most cases.

Locking rule comment is updated such that the 'I' rule includes
modifying the field from destruction path.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 ++
 kernel/workqueue.c        | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f9d277bcd9a..c959666eafca 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -241,6 +241,8 @@ enum {
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
+	WQ_DYING		= 1 << 6, /* internal: workqueue is dying */
+
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
 	WQ_DFL_ACTIVE		= WQ_MAX_ACTIVE / 2,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cc3456f96c56..362b50d092e2 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -87,7 +87,8 @@ enum {
 /*
  * Structure fields follow one of the following exclusion rules.
  *
- * I: Set during initialization and read-only afterwards.
+ * I: Modifiable by initialization/destruction paths and read-only for
+ *    everyone else.
  *
  * P: Preemption protected.  Disabling preemption is enough and should
  *    only be modified and accessed from the local cpu.
@@ -944,6 +945,9 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 
 	debug_work_activate(work);
 
+	if (WARN_ON_ONCE(wq->flags & WQ_DYING))
+		return;
+
 	/* determine gcwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct global_cwq *last_gcwq;
@@ -2828,6 +2832,7 @@ void destroy_workqueue(struct workqueue_struct *wq)
 {
 	unsigned int cpu;
 
+	wq->flags |= WQ_DYING;
 	flush_workqueue(wq);
 
 	/*
-- 
cgit v1.2.3


From 8a2e8e5dec7e29c56a46ba176c664ab6a3d04118 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 25 Aug 2010 10:33:56 +0200
Subject: workqueue: fix cwq->nr_active underflow

cwq->nr_active is used to keep track of how many work items are active
for the cpu workqueue, where 'active' is defined as either pending on
global worklist or executing.  This is used to implement the
max_active limit and workqueue freezing.  If a work item is queued
after nr_active has already reached max_active, the work item doesn't
increment nr_active and is put on the delayed queue and gets activated
later as previous active work items retire.

try_to_grab_pending() which is used in the cancellation path
unconditionally decremented nr_active whether the work item being
cancelled is currently active or delayed, so cancelling a delayed work
item makes nr_active underflow.  This breaks max_active enforcement
and triggers BUG_ON() in destroy_workqueue() later on.

This patch fixes this bug by adding a flag WORK_STRUCT_DELAYED, which
is set while a work item in on the delayed list and making
try_to_grab_pending() decrement nr_active iff the work item is
currently active.

The addition of the flag enlarges cwq alignment to 256 bytes which is
getting a bit too large.  It's scheduled to be reduced back to 128
bytes by merging WORK_STRUCT_PENDING and WORK_STRUCT_CWQ in the next
devel cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/workqueue.h | 16 +++++++++-------
 kernel/workqueue.c        | 30 ++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c959666eafca..f11100f96482 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -25,18 +25,20 @@ typedef void (*work_func_t)(struct work_struct *work);
 
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
-	WORK_STRUCT_CWQ_BIT	= 1,	/* data points to cwq */
-	WORK_STRUCT_LINKED_BIT	= 2,	/* next work is linked to this one */
+	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
+	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
-	WORK_STRUCT_STATIC_BIT	= 3,	/* static initializer (debugobjects) */
-	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
+	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
+	WORK_STRUCT_COLOR_SHIFT	= 5,	/* color for workqueue flushing */
 #else
-	WORK_STRUCT_COLOR_SHIFT	= 3,	/* color for workqueue flushing */
+	WORK_STRUCT_COLOR_SHIFT	= 4,	/* color for workqueue flushing */
 #endif
 
 	WORK_STRUCT_COLOR_BITS	= 4,
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
+	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
 	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -59,8 +61,8 @@ enum {
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 128 bytes which isn't too
-	 * excessive while allowing 15 workqueue flush colors.
+	 * off.  This makes cwqs aligned to 256 bytes and allows 15
+	 * workqueue flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 362b50d092e2..a2dccfca03ba 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -941,6 +941,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	struct global_cwq *gcwq;
 	struct cpu_workqueue_struct *cwq;
 	struct list_head *worklist;
+	unsigned int work_flags;
 	unsigned long flags;
 
 	debug_work_activate(work);
@@ -990,14 +991,17 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	BUG_ON(!list_empty(&work->entry));
 
 	cwq->nr_in_flight[cwq->work_color]++;
+	work_flags = work_color_to_flags(cwq->work_color);
 
 	if (likely(cwq->nr_active < cwq->max_active)) {
 		cwq->nr_active++;
 		worklist = gcwq_determine_ins_pos(gcwq, cwq);
-	} else
+	} else {
+		work_flags |= WORK_STRUCT_DELAYED;
 		worklist = &cwq->delayed_works;
+	}
 
-	insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
+	insert_work(cwq, work, worklist, work_flags);
 
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 }
@@ -1666,6 +1670,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
 	struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq);
 
 	move_linked_works(work, pos, NULL);
+	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
 	cwq->nr_active++;
 }
 
@@ -1673,6 +1678,7 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
  * @color: color of work which left the queue
+ * @delayed: for a delayed work
  *
  * A work either has completed or is removed from pending queue,
  * decrement nr_in_flight of its cwq and handle workqueue flushing.
@@ -1680,19 +1686,22 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
  * CONTEXT:
  * spin_lock_irq(gcwq->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
+				 bool delayed)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
 	cwq->nr_in_flight[color]--;
-	cwq->nr_active--;
 
-	if (!list_empty(&cwq->delayed_works)) {
-		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+	if (!delayed) {
+		cwq->nr_active--;
+		if (!list_empty(&cwq->delayed_works)) {
+			/* one down, submit a delayed one */
+			if (cwq->nr_active < cwq->max_active)
+				cwq_activate_first_delayed(cwq);
+		}
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
@@ -1823,7 +1832,7 @@ __acquires(&gcwq->lock)
 	hlist_del_init(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	cwq_dec_nr_in_flight(cwq, work_color, false);
 }
 
 /**
@@ -2388,7 +2397,8 @@ static int try_to_grab_pending(struct work_struct *work)
 			debug_work_deactivate(work);
 			list_del_init(&work->entry);
 			cwq_dec_nr_in_flight(get_work_cwq(work),
-					     get_work_color(work));
+				get_work_color(work),
+				*work_data_bits(work) & WORK_STRUCT_DELAYED);
 			ret = 1;
 		}
 	}
-- 
cgit v1.2.3


From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 10 Sep 2010 16:51:36 +0200
Subject: workqueue: add documentation

Update copyright notice and add Documentation/workqueue.txt.

Randy Dunlap, Dave Chinner: misc fixes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-By: Florian Mickler <florian@mickler.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
---
 Documentation/workqueue.txt | 380 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/workqueue.h   |   4 +
 kernel/workqueue.c          |  27 ++--
 3 files changed, 401 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/workqueue.txt

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
new file mode 100644
index 000000000000..e4498a2872c3
--- /dev/null
+++ b/Documentation/workqueue.txt
@@ -0,0 +1,380 @@
+
+Concurrency Managed Workqueue (cmwq)
+
+September, 2010		Tejun Heo <tj@kernel.org>
+			Florian Mickler <florian@mickler.org>
+
+CONTENTS
+
+1. Introduction
+2. Why cmwq?
+3. The Design
+4. Application Programming Interface (API)
+5. Example Execution Scenarios
+6. Guidelines
+
+
+1. Introduction
+
+There are many cases where an asynchronous process execution context
+is needed and the workqueue (wq) API is the most commonly used
+mechanism for such cases.
+
+When such an asynchronous execution context is needed, a work item
+describing which function to execute is put on a queue.  An
+independent thread serves as the asynchronous execution context.  The
+queue is called workqueue and the thread is called worker.
+
+While there are work items on the workqueue the worker executes the
+functions associated with the work items one after the other.  When
+there is no work item left on the workqueue the worker becomes idle.
+When a new work item gets queued, the worker begins executing again.
+
+
+2. Why cmwq?
+
+In the original wq implementation, a multi threaded (MT) wq had one
+worker thread per CPU and a single threaded (ST) wq had one worker
+thread system-wide.  A single MT wq needed to keep around the same
+number of workers as the number of CPUs.  The kernel grew a lot of MT
+wq users over the years and with the number of CPU cores continuously
+rising, some systems saturated the default 32k PID space just booting
+up.
+
+Although MT wq wasted a lot of resource, the level of concurrency
+provided was unsatisfactory.  The limitation was common to both ST and
+MT wq albeit less severe on MT.  Each wq maintained its own separate
+worker pool.  A MT wq could provide only one execution context per CPU
+while a ST wq one for the whole system.  Work items had to compete for
+those very limited execution contexts leading to various problems
+including proneness to deadlocks around the single execution context.
+
+The tension between the provided level of concurrency and resource
+usage also forced its users to make unnecessary tradeoffs like libata
+choosing to use ST wq for polling PIOs and accepting an unnecessary
+limitation that no two polling PIOs can progress at the same time.  As
+MT wq don't provide much better concurrency, users which require
+higher level of concurrency, like async or fscache, had to implement
+their own thread pool.
+
+Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with
+focus on the following goals.
+
+* Maintain compatibility with the original workqueue API.
+
+* Use per-CPU unified worker pools shared by all wq to provide
+  flexible level of concurrency on demand without wasting a lot of
+  resource.
+
+* Automatically regulate worker pool and level of concurrency so that
+  the API users don't need to worry about such details.
+
+
+3. The Design
+
+In order to ease the asynchronous execution of functions a new
+abstraction, the work item, is introduced.
+
+A work item is a simple struct that holds a pointer to the function
+that is to be executed asynchronously.  Whenever a driver or subsystem
+wants a function to be executed asynchronously it has to set up a work
+item pointing to that function and queue that work item on a
+workqueue.
+
+Special purpose threads, called worker threads, execute the functions
+off of the queue, one after the other.  If no work is queued, the
+worker threads become idle.  These worker threads are managed in so
+called thread-pools.
+
+The cmwq design differentiates between the user-facing workqueues that
+subsystems and drivers queue work items on and the backend mechanism
+which manages thread-pool and processes the queued work items.
+
+The backend is called gcwq.  There is one gcwq for each possible CPU
+and one gcwq to serve work items queued on unbound workqueues.
+
+Subsystems and drivers can create and queue work items through special
+workqueue API functions as they see fit. They can influence some
+aspects of the way the work items are executed by setting flags on the
+workqueue they are putting the work item on. These flags include
+things like CPU locality, reentrancy, concurrency limits and more. To
+get a detailed overview refer to the API description of
+alloc_workqueue() below.
+
+When a work item is queued to a workqueue, the target gcwq is
+determined according to the queue parameters and workqueue attributes
+and appended on the shared worklist of the gcwq.  For example, unless
+specifically overridden, a work item of a bound workqueue will be
+queued on the worklist of exactly that gcwq that is associated to the
+CPU the issuer is running on.
+
+For any worker pool implementation, managing the concurrency level
+(how many execution contexts are active) is an important issue.  cmwq
+tries to keep the concurrency at a minimal but sufficient level.
+Minimal to save resources and sufficient in that the system is used at
+its full capacity.
+
+Each gcwq bound to an actual CPU implements concurrency management by
+hooking into the scheduler.  The gcwq is notified whenever an active
+worker wakes up or sleeps and keeps track of the number of the
+currently runnable workers.  Generally, work items are not expected to
+hog a CPU and consume many cycles.  That means maintaining just enough
+concurrency to prevent work processing from stalling should be
+optimal.  As long as there are one or more runnable workers on the
+CPU, the gcwq doesn't start execution of a new work, but, when the
+last running worker goes to sleep, it immediately schedules a new
+worker so that the CPU doesn't sit idle while there are pending work
+items.  This allows using a minimal number of workers without losing
+execution bandwidth.
+
+Keeping idle workers around doesn't cost other than the memory space
+for kthreads, so cmwq holds onto idle ones for a while before killing
+them.
+
+For an unbound wq, the above concurrency management doesn't apply and
+the gcwq for the pseudo unbound CPU tries to start executing all work
+items as soon as possible.  The responsibility of regulating
+concurrency level is on the users.  There is also a flag to mark a
+bound wq to ignore the concurrency management.  Please refer to the
+API section for details.
+
+Forward progress guarantee relies on that workers can be created when
+more execution contexts are necessary, which in turn is guaranteed
+through the use of rescue workers.  All work items which might be used
+on code paths that handle memory reclaim are required to be queued on
+wq's that have a rescue-worker reserved for execution under memory
+pressure.  Else it is possible that the thread-pool deadlocks waiting
+for execution contexts to free up.
+
+
+4. Application Programming Interface (API)
+
+alloc_workqueue() allocates a wq.  The original create_*workqueue()
+functions are deprecated and scheduled for removal.  alloc_workqueue()
+takes three arguments - @name, @flags and @max_active.  @name is the
+name of the wq and also used as the name of the rescuer thread if
+there is one.
+
+A wq no longer manages execution resources but serves as a domain for
+forward progress guarantee, flush and work item attributes.  @flags
+and @max_active control how work items are assigned execution
+resources, scheduled and executed.
+
+@flags:
+
+  WQ_NON_REENTRANT
+
+	By default, a wq guarantees non-reentrance only on the same
+	CPU.  A work item may not be executed concurrently on the same
+	CPU by multiple workers but is allowed to be executed
+	concurrently on multiple CPUs.  This flag makes sure
+	non-reentrance is enforced across all CPUs.  Work items queued
+	to a non-reentrant wq are guaranteed to be executed by at most
+	one worker system-wide at any given time.
+
+  WQ_UNBOUND
+
+	Work items queued to an unbound wq are served by a special
+	gcwq which hosts workers which are not bound to any specific
+	CPU.  This makes the wq behave as a simple execution context
+	provider without concurrency management.  The unbound gcwq
+	tries to start execution of work items as soon as possible.
+	Unbound wq sacrifices locality but is useful for the following
+	cases.
+
+	* Wide fluctuation in the concurrency level requirement is
+	  expected and using bound wq may end up creating large number
+	  of mostly unused workers across different CPUs as the issuer
+	  hops through different CPUs.
+
+	* Long running CPU intensive workloads which can be better
+	  managed by the system scheduler.
+
+  WQ_FREEZEABLE
+
+	A freezeable wq participates in the freeze phase of the system
+	suspend operations.  Work items on the wq are drained and no
+	new work item starts execution until thawed.
+
+  WQ_RESCUER
+
+	All wq which might be used in the memory reclaim paths _MUST_
+	have this flag set.  This reserves one worker exclusively for
+	the execution of this wq under memory pressure.
+
+  WQ_HIGHPRI
+
+	Work items of a highpri wq are queued at the head of the
+	worklist of the target gcwq and start execution regardless of
+	the current concurrency level.  In other words, highpri work
+	items will always start execution as soon as execution
+	resource is available.
+
+	Ordering among highpri work items is preserved - a highpri
+	work item queued after another highpri work item will start
+	execution after the earlier highpri work item starts.
+
+	Although highpri work items are not held back by other
+	runnable work items, they still contribute to the concurrency
+	level.  Highpri work items in runnable state will prevent
+	non-highpri work items from starting execution.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_CPU_INTENSIVE
+
+	Work items of a CPU intensive wq do not contribute to the
+	concurrency level.  In other words, runnable CPU intensive
+	work items will not prevent other work items from starting
+	execution.  This is useful for bound work items which are
+	expected to hog CPU cycles so that their execution is
+	regulated by the system scheduler.
+
+	Although CPU intensive work items don't contribute to the
+	concurrency level, start of their executions is still
+	regulated by the concurrency management and runnable
+	non-CPU-intensive work items can delay execution of CPU
+	intensive work items.
+
+	This flag is meaningless for unbound wq.
+
+  WQ_HIGHPRI | WQ_CPU_INTENSIVE
+
+	This combination makes the wq avoid interaction with
+	concurrency management completely and behave as a simple
+	per-CPU execution context provider.  Work items queued on a
+	highpri CPU-intensive wq start execution as soon as resources
+	are available and don't affect execution of other work items.
+
+@max_active:
+
+@max_active determines the maximum number of execution contexts per
+CPU which can be assigned to the work items of a wq.  For example,
+with @max_active of 16, at most 16 work items of the wq can be
+executing at the same time per CPU.
+
+Currently, for a bound wq, the maximum limit for @max_active is 512
+and the default value used when 0 is specified is 256.  For an unbound
+wq, the limit is higher of 512 and 4 * num_possible_cpus().  These
+values are chosen sufficiently high such that they are not the
+limiting factor while providing protection in runaway cases.
+
+The number of active work items of a wq is usually regulated by the
+users of the wq, more specifically, by how many work items the users
+may queue at the same time.  Unless there is a specific need for
+throttling the number of active work items, specifying '0' is
+recommended.
+
+Some users depend on the strict execution ordering of ST wq.  The
+combination of @max_active of 1 and WQ_UNBOUND is used to achieve this
+behavior.  Work items on such wq are always queued to the unbound gcwq
+and only one work item can be active at any given time thus achieving
+the same ordering property as ST wq.
+
+
+5. Example Execution Scenarios
+
+The following example execution scenarios try to illustrate how cmwq
+behave under different configurations.
+
+ Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU.
+ w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms
+ again before finishing.  w1 and w2 burn CPU for 5ms then sleep for
+ 10ms.
+
+Ignoring all other tasks, works and processing overhead, and assuming
+simple FIFO scheduling, the following is one highly simplified version
+of possible sequences of events with the original wq.
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 starts and burns CPU
+ 25		w1 sleeps
+ 35		w1 wakes up and finishes
+ 35		w2 starts and burns CPU
+ 40		w2 sleeps
+ 50		w2 wakes up and finishes
+
+And with cmwq with @max_active >= 3,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 10		w2 starts and burns CPU
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+If @max_active == 2,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 starts and burns CPU
+ 10		w1 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 20		w2 starts and burns CPU
+ 25		w2 sleeps
+ 35		w2 wakes up and finishes
+
+Now, let's assume w1 and w2 are queued to a different wq q1 which has
+WQ_HIGHPRI set,
+
+ TIME IN MSECS	EVENT
+ 0		w1 and w2 start and burn CPU
+ 5		w1 sleeps
+ 10		w2 sleeps
+ 10		w0 starts and burns CPU
+ 15		w0 sleeps
+ 15		w1 wakes up and finishes
+ 20		w2 wakes up and finishes
+ 25		w0 wakes up and burns CPU
+ 30		w0 finishes
+
+If q1 has WQ_CPU_INTENSIVE set,
+
+ TIME IN MSECS	EVENT
+ 0		w0 starts and burns CPU
+ 5		w0 sleeps
+ 5		w1 and w2 start and burn CPU
+ 10		w1 sleeps
+ 15		w2 sleeps
+ 15		w0 wakes up and burns CPU
+ 20		w0 finishes
+ 20		w1 wakes up and finishes
+ 25		w2 wakes up and finishes
+
+
+6. Guidelines
+
+* Do not forget to use WQ_RESCUER if a wq may process work items which
+  are used during memory reclaim.  Each wq with WQ_RESCUER set has one
+  rescuer thread reserved for it.  If there is dependency among
+  multiple work items used during memory reclaim, they should be
+  queued to separate wq each with WQ_RESCUER.
+
+* Unless strict ordering is required, there is no need to use ST wq.
+
+* Unless there is a specific need, using 0 for @max_active is
+  recommended.  In most use cases, concurrency level usually stays
+  well under the default limit.
+
+* A wq serves as a domain for forward progress guarantee (WQ_RESCUER),
+  flush and work item attributes.  Work items which are not involved
+  in memory reclaim and don't need to be flushed as a part of a group
+  of work items, and don't require any special attribute, can use one
+  of the system wq.  There is no difference in execution
+  characteristics between using a dedicated wq and a system wq.
+
+* Unless work items are expected to consume a huge amount of CPU
+  cycles, using a bound wq is usually beneficial due to the increased
+  level of locality in wq operations and work item execution.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f96482..25e02c941bac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define work_clear_pending(work) \
 	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
 
+/*
+ * Workqueue flags and constants.  For details, please refer to
+ * Documentation/workqueue.txt.
+ */
 enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 727f24e563ae..f77afd939229 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1,19 +1,26 @@
 /*
- * linux/kernel/workqueue.c
+ * kernel/workqueue.c - generic async execution with shared worker pool
  *
- * Generic mechanism for defining kernel helper threads for running
- * arbitrary tasks in process context.
+ * Copyright (C) 2002		Ingo Molnar
  *
- * Started by Ingo Molnar, Copyright (C) 2002
+ *   Derived from the taskqueue/keventd code by:
+ *     David Woodhouse <dwmw2@infradead.org>
+ *     Andrew Morton
+ *     Kai Petzke <wpp@marie.physik.tu-berlin.de>
+ *     Theodore Ts'o <tytso@mit.edu>
  *
- * Derived from the taskqueue/keventd code by:
+ * Made to use alloc_percpu by Christoph Lameter.
  *
- *   David Woodhouse <dwmw2@infradead.org>
- *   Andrew Morton
- *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
- *   Theodore Ts'o <tytso@mit.edu>
+ * Copyright (C) 2010		SUSE Linux Products GmbH
+ * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
  *
- * Made to use alloc_percpu by Christoph Lameter.
+ * This is the generic async execution mechanism.  Work items as are
+ * executed in process context.  The worker pool is shared and
+ * automatically managed.  There is one worker pool for each CPU and
+ * one extra for works which are better served by workers which are
+ * not bound to any specific CPU.
+ *
+ * Please read Documentation/workqueue.txt for details.
  */
 
 #include <linux/module.h>
-- 
cgit v1.2.3


From 81dcaf6516d8bbd75b894862c8ae7bba04380cfe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Sep 2010 10:17:35 +0200
Subject: workqueue: implement alloc_ordered_workqueue()

alloc_ordered_workqueue() creates a workqueue which processes each
work itemp one by one in the queued order.  This will be used to
replace create_freezeable_workqueue() and
create_singlethread_workqueue().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 25e02c941bac..07c48925a8fc 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -306,6 +306,24 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
 #endif
 
+/**
+ * alloc_ordered_workqueue - allocate an ordered workqueue
+ * @name: name of the workqueue
+ * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_RESCUER are meaningful)
+ *
+ * Allocate an ordered workqueue.  An ordered workqueue executes at
+ * most one work item at any given time in the queued order.  They are
+ * implemented as unbound workqueues with @max_active of one.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
+static inline struct workqueue_struct *
+alloc_ordered_workqueue(const char *name, unsigned int flags)
+{
+	return alloc_workqueue(name, WQ_UNBOUND | flags, 1);
+}
+
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_RESCUER, 1)
 #define create_freezeable_workqueue(name)			\
-- 
cgit v1.2.3


From 401a8d048eadfbe1b1c1bf53d3b614fcc894c61a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Sep 2010 10:36:00 +0200
Subject: workqueue: cleanup flush/cancel functions

Make the following cleanup changes.

* Relocate flush/cancel function prototypes and definitions.

* Relocate wait_on_cpu_work() and wait_on_work() before
  try_to_grab_pending().  These will be used to implement
  flush_work_sync().

* Make all flush/cancel functions return bool instead of int.

* Update wait_on_cpu_work() and wait_on_work() to return %true if they
  actually waited.

* Add / update comments.

This patch doesn't cause any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  18 ++---
 kernel/workqueue.c        | 175 +++++++++++++++++++++++++---------------------
 2 files changed, 103 insertions(+), 90 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 07c48925a8fc..bb9b683ea6fa 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -343,7 +343,6 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
-extern void flush_delayed_work(struct delayed_work *work);
 
 extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
@@ -355,8 +354,11 @@ extern int keventd_up(void);
 
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
-extern int flush_work(struct work_struct *work);
-extern int cancel_work_sync(struct work_struct *work);
+extern bool flush_work(struct work_struct *work);
+extern bool cancel_work_sync(struct work_struct *work);
+
+extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
@@ -370,9 +372,9 @@ extern unsigned int work_busy(struct work_struct *work);
  * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
  * cancel_work_sync() to wait on it.
  */
-static inline int cancel_delayed_work(struct delayed_work *work)
+static inline bool cancel_delayed_work(struct delayed_work *work)
 {
-	int ret;
+	bool ret;
 
 	ret = del_timer_sync(&work->timer);
 	if (ret)
@@ -385,9 +387,9 @@ static inline int cancel_delayed_work(struct delayed_work *work)
  * if it returns 0 the timer function may be running and the queueing is in
  * progress.
  */
-static inline int __cancel_delayed_work(struct delayed_work *work)
+static inline bool __cancel_delayed_work(struct delayed_work *work)
 {
-	int ret;
+	bool ret;
 
 	ret = del_timer(&work->timer);
 	if (ret)
@@ -395,8 +397,6 @@ static inline int __cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-extern int cancel_delayed_work_sync(struct delayed_work *work);
-
 /* Obsolete. use cancel_delayed_work_sync() */
 static inline
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f77afd939229..1240b9d94b03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2327,16 +2327,24 @@ out_unlock:
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
 /**
- * flush_work - block until a work_struct's callback has terminated
- * @work: the work which is to be flushed
+ * flush_work - wait for a work to finish executing the last queueing instance
+ * @work: the work to flush
  *
- * Returns false if @work has already terminated.
+ * Wait until @work has finished execution.  This function considers
+ * only the last queueing instance of @work.  If @work has been
+ * enqueued across different CPUs on a non-reentrant workqueue or on
+ * multiple workqueues, @work might still be executing on return on
+ * some of the CPUs from earlier queueing.
  *
- * It is expected that, prior to calling flush_work(), the caller has
- * arranged for the work to not be requeued, otherwise it doesn't make
- * sense to use this function.
+ * If @work was queued only on a non-reentrant, ordered or unbound
+ * workqueue, @work is guaranteed to be idle on return if it hasn't
+ * been requeued since flush started.
+ *
+ * RETURNS:
+ * %true if flush_work() waited for the work to finish execution,
+ * %false if it was already idle.
  */
-int flush_work(struct work_struct *work)
+bool flush_work(struct work_struct *work)
 {
 	struct worker *worker = NULL;
 	struct global_cwq *gcwq;
@@ -2374,13 +2382,49 @@ int flush_work(struct work_struct *work)
 
 	wait_for_completion(&barr.done);
 	destroy_work_on_stack(&barr.work);
-	return 1;
+	return true;
 already_gone:
 	spin_unlock_irq(&gcwq->lock);
-	return 0;
+	return false;
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
+static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
+{
+	struct wq_barrier barr;
+	struct worker *worker;
+
+	spin_lock_irq(&gcwq->lock);
+
+	worker = find_worker_executing_work(gcwq, work);
+	if (unlikely(worker))
+		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
+
+	spin_unlock_irq(&gcwq->lock);
+
+	if (unlikely(worker)) {
+		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+		return true;
+	} else
+		return false;
+}
+
+static bool wait_on_work(struct work_struct *work)
+{
+	bool ret = false;
+	int cpu;
+
+	might_sleep();
+
+	lock_map_acquire(&work->lockdep_map);
+	lock_map_release(&work->lockdep_map);
+
+	for_each_gcwq_cpu(cpu)
+		ret |= wait_on_cpu_work(get_gcwq(cpu), work);
+	return ret;
+}
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
@@ -2423,39 +2467,7 @@ static int try_to_grab_pending(struct work_struct *work)
 	return ret;
 }
 
-static void wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
-{
-	struct wq_barrier barr;
-	struct worker *worker;
-
-	spin_lock_irq(&gcwq->lock);
-
-	worker = find_worker_executing_work(gcwq, work);
-	if (unlikely(worker))
-		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
-
-	spin_unlock_irq(&gcwq->lock);
-
-	if (unlikely(worker)) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
-	}
-}
-
-static void wait_on_work(struct work_struct *work)
-{
-	int cpu;
-
-	might_sleep();
-
-	lock_map_acquire(&work->lockdep_map);
-	lock_map_release(&work->lockdep_map);
-
-	for_each_gcwq_cpu(cpu)
-		wait_on_cpu_work(get_gcwq(cpu), work);
-}
-
-static int __cancel_work_timer(struct work_struct *work,
+static bool __cancel_work_timer(struct work_struct *work,
 				struct timer_list* timer)
 {
 	int ret;
@@ -2472,42 +2484,60 @@ static int __cancel_work_timer(struct work_struct *work,
 }
 
 /**
- * cancel_work_sync - block until a work_struct's callback has terminated
- * @work: the work which is to be flushed
- *
- * Returns true if @work was pending.
+ * cancel_work_sync - cancel a work and wait for it to finish
+ * @work: the work to cancel
  *
- * cancel_work_sync() will cancel the work if it is queued. If the work's
- * callback appears to be running, cancel_work_sync() will block until it
- * has completed.
+ * Cancel @work and wait for its execution to finish.  This function
+ * can be used even if the work re-queues itself or migrates to
+ * another workqueue.  On return from this function, @work is
+ * guaranteed to be not pending or executing on any CPU.
  *
- * It is possible to use this function if the work re-queues itself. It can
- * cancel the work even if it migrates to another workqueue, however in that
- * case it only guarantees that work->func() has completed on the last queued
- * workqueue.
- *
- * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
- * pending, otherwise it goes into a busy-wait loop until the timer expires.
+ * cancel_work_sync(&delayed_work->work) must not be used for
+ * delayed_work's.  Use cancel_delayed_work_sync() instead.
  *
- * The caller must ensure that workqueue_struct on which this work was last
+ * The caller must ensure that the workqueue on which @work was last
  * queued can't be destroyed before this function returns.
+ *
+ * RETURNS:
+ * %true if @work was pending, %false otherwise.
  */
-int cancel_work_sync(struct work_struct *work)
+bool cancel_work_sync(struct work_struct *work)
 {
 	return __cancel_work_timer(work, NULL);
 }
 EXPORT_SYMBOL_GPL(cancel_work_sync);
 
 /**
- * cancel_delayed_work_sync - reliably kill off a delayed work.
- * @dwork: the delayed work struct
+ * flush_delayed_work - wait for a dwork to finish executing the last queueing
+ * @dwork: the delayed work to flush
+ *
+ * Delayed timer is cancelled and the pending work is queued for
+ * immediate execution.  Like flush_work(), this function only
+ * considers the last queueing instance of @dwork.
+ *
+ * RETURNS:
+ * %true if flush_work() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_delayed_work(struct delayed_work *dwork)
+{
+	if (del_timer_sync(&dwork->timer))
+		__queue_work(raw_smp_processor_id(),
+			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+	return flush_work(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work);
+
+/**
+ * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
+ * @dwork: the delayed work cancel
  *
- * Returns true if @dwork was pending.
+ * This is cancel_work_sync() for delayed works.
  *
- * It is possible to use this function if @dwork rearms itself via queue_work()
- * or queue_delayed_work(). See also the comment for cancel_work_sync().
+ * RETURNS:
+ * %true if @dwork was pending, %false otherwise.
  */
-int cancel_delayed_work_sync(struct delayed_work *dwork)
+bool cancel_delayed_work_sync(struct delayed_work *dwork)
 {
 	return __cancel_work_timer(&dwork->work, &dwork->timer);
 }
@@ -2558,23 +2588,6 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
-/**
- * flush_delayed_work - block until a dwork_struct's callback has terminated
- * @dwork: the delayed work which is to be flushed
- *
- * Any timeout is cancelled, and any pending work is run immediately.
- */
-void flush_delayed_work(struct delayed_work *dwork)
-{
-	if (del_timer_sync(&dwork->timer)) {
-		__queue_work(get_cpu(), get_work_cwq(&dwork->work)->wq,
-			     &dwork->work);
-		put_cpu();
-	}
-	flush_work(&dwork->work);
-}
-EXPORT_SYMBOL(flush_delayed_work);
-
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
-- 
cgit v1.2.3


From 09383498c5d35262e643bfdbae84826177a3c624 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 16 Sep 2010 10:48:29 +0200
Subject: workqueue: implement flush[_delayed]_work_sync()

Implement flush[_delayed]_work_sync().  These are flush functions
which also make sure no CPU is still executing the target work from
earlier queueing instances.  These are similar to
cancel[_delayed]_work_sync() except that the target work item is
flushed instead of cancelled.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 56 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index bb9b683ea6fa..e33ff4a91703 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -355,9 +355,11 @@ extern int keventd_up(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern bool flush_work(struct work_struct *work);
+extern bool flush_work_sync(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool flush_delayed_work_sync(struct delayed_work *work);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 33d31d768706..19e4bc15ee99 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2435,6 +2435,41 @@ static bool wait_on_work(struct work_struct *work)
 	return ret;
 }
 
+/**
+ * flush_work_sync - wait until a work has finished execution
+ * @work: the work to flush
+ *
+ * Wait until @work has finished execution.  On return, it's
+ * guaranteed that all queueing instances of @work which happened
+ * before this function is called are finished.  In other words, if
+ * @work hasn't been requeued since this function was called, @work is
+ * guaranteed to be idle on return.
+ *
+ * RETURNS:
+ * %true if flush_work_sync() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_work_sync(struct work_struct *work)
+{
+	struct wq_barrier barr;
+	bool pending, waited;
+
+	/* we'll wait for executions separately, queue barr only if pending */
+	pending = start_flush_work(work, &barr, false);
+
+	/* wait for executions to finish */
+	waited = wait_on_work(work);
+
+	/* wait for the pending one */
+	if (pending) {
+		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+	}
+
+	return pending || waited;
+}
+EXPORT_SYMBOL_GPL(flush_work_sync);
+
 /*
  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
  * so this work can't be re-armed in any way.
@@ -2538,6 +2573,27 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
+/**
+ * flush_delayed_work_sync - wait for a dwork to finish
+ * @dwork: the delayed work to flush
+ *
+ * Delayed timer is cancelled and the pending work is queued for
+ * execution immediately.  Other than timer handling, its behavior
+ * is identical to flush_work_sync().
+ *
+ * RETURNS:
+ * %true if flush_work_sync() waited for the work to finish execution,
+ * %false if it was already idle.
+ */
+bool flush_delayed_work_sync(struct delayed_work *dwork)
+{
+	if (del_timer_sync(&dwork->timer))
+		__queue_work(raw_smp_processor_id(),
+			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+	return flush_work_sync(&dwork->work);
+}
+EXPORT_SYMBOL(flush_delayed_work_sync);
+
 /**
  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
  * @dwork: the delayed work cancel
-- 
cgit v1.2.3


From 6370a6ad3b53df90b4700977f7718118a2cd524a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Oct 2010 15:12:27 +0200
Subject: workqueue: add and use WQ_MEM_RECLAIM flag

Add WQ_MEM_RECLAIM flag which currently maps to WQ_RESCUER, mark
WQ_RESCUER as internal and replace all external WQ_RESCUER usages to
WQ_MEM_RECLAIM.

This makes the API users express the intent of the workqueue instead
of indicating the internal mechanism used to guarantee forward
progress.  This is also to make it cleaner to add more semantics to
WQ_MEM_RECLAIM.  For example, if deemed necessary, memory reclaim
workqueues can be made highpri.

This patch doesn't introduce any functional change.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jeff Garzik <jgarzik@pobox.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Steven Whitehouse <swhiteho@redhat.com>
---
 Documentation/workqueue.txt | 29 +++++++++++++++--------------
 drivers/ata/libata-sff.c    |  2 +-
 fs/gfs2/main.c              |  2 +-
 fs/xfs/linux-2.6/xfs_buf.c  |  2 +-
 include/linux/workqueue.h   | 11 ++++++-----
 kernel/workqueue.c          |  7 +++++++
 6 files changed, 31 insertions(+), 22 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index e4498a2872c3..996a27d9b8db 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -196,11 +196,11 @@ resources, scheduled and executed.
 	suspend operations.  Work items on the wq are drained and no
 	new work item starts execution until thawed.
 
-  WQ_RESCUER
+  WQ_MEM_RECLAIM
 
 	All wq which might be used in the memory reclaim paths _MUST_
-	have this flag set.  This reserves one worker exclusively for
-	the execution of this wq under memory pressure.
+	have this flag set.  The wq is guaranteed to have at least one
+	execution context regardless of memory pressure.
 
   WQ_HIGHPRI
 
@@ -356,11 +356,11 @@ If q1 has WQ_CPU_INTENSIVE set,
 
 6. Guidelines
 
-* Do not forget to use WQ_RESCUER if a wq may process work items which
-  are used during memory reclaim.  Each wq with WQ_RESCUER set has one
-  rescuer thread reserved for it.  If there is dependency among
-  multiple work items used during memory reclaim, they should be
-  queued to separate wq each with WQ_RESCUER.
+* Do not forget to use WQ_MEM_RECLAIM if a wq may process work items
+  which are used during memory reclaim.  Each wq with WQ_MEM_RECLAIM
+  set has an execution context reserved for it.  If there is
+  dependency among multiple work items used during memory reclaim,
+  they should be queued to separate wq each with WQ_MEM_RECLAIM.
 
 * Unless strict ordering is required, there is no need to use ST wq.
 
@@ -368,12 +368,13 @@ If q1 has WQ_CPU_INTENSIVE set,
   recommended.  In most use cases, concurrency level usually stays
   well under the default limit.
 
-* A wq serves as a domain for forward progress guarantee (WQ_RESCUER),
-  flush and work item attributes.  Work items which are not involved
-  in memory reclaim and don't need to be flushed as a part of a group
-  of work items, and don't require any special attribute, can use one
-  of the system wq.  There is no difference in execution
-  characteristics between using a dedicated wq and a system wq.
+* A wq serves as a domain for forward progress guarantee
+  (WQ_MEM_RECLAIM, flush and work item attributes.  Work items which
+  are not involved in memory reclaim and don't need to be flushed as a
+  part of a group of work items, and don't require any special
+  attribute, can use one of the system wq.  There is no difference in
+  execution characteristics between using a dedicated wq and a system
+  wq.
 
 * Unless work items are expected to consume a huge amount of CPU
   cycles, using a bound wq is usually beneficial due to the increased
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index e30c537cce32..f5296bb19ec0 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -3335,7 +3335,7 @@ void ata_sff_port_init(struct ata_port *ap)
 
 int __init ata_sff_init(void)
 {
-	ata_sff_wq = alloc_workqueue("ata_sff", WQ_RESCUER, WQ_MAX_ACTIVE);
+	ata_sff_wq = alloc_workqueue("ata_sff", WQ_MEM_RECLAIM, WQ_MAX_ACTIVE);
 	if (!ata_sff_wq)
 		return -ENOMEM;
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index b1e9630eb46a..1c5f46075d52 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -140,7 +140,7 @@ static int __init init_gfs2_fs(void)
 
 	error = -ENOMEM;
 	gfs_recovery_wq = alloc_workqueue("gfs_recovery",
-					  WQ_NON_REENTRANT | WQ_RESCUER, 0);
+					  WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 286e36e21dae..6838aefca71f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1933,7 +1933,7 @@ xfs_buf_init(void)
 		goto out;
 
 	xfslogd_workqueue = alloc_workqueue("xfslogd",
-					WQ_RESCUER | WQ_HIGHPRI, 1);
+					WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
 	if (!xfslogd_workqueue)
 		goto out_free_buf_zone;
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e33ff4a91703..03bbe903e5ce 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -243,11 +243,12 @@ enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
 	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
-	WQ_RESCUER		= 1 << 3, /* has an rescue worker */
+	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	WQ_DYING		= 1 << 6, /* internal: workqueue is dying */
+	WQ_RESCUER		= 1 << 7, /* internal: workqueue has rescuer */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -309,7 +310,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
  * @name: name of the workqueue
- * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_RESCUER are meaningful)
+ * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful)
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
@@ -325,11 +326,11 @@ alloc_ordered_workqueue(const char *name, unsigned int flags)
 }
 
 #define create_workqueue(name)					\
-	alloc_workqueue((name), WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
 #define create_freezeable_workqueue(name)			\
-	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 #define create_singlethread_workqueue(name)			\
-	alloc_workqueue((name), WQ_UNBOUND | WQ_RESCUER, 1)
+	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b57a8babdec3..2c6871cbcbee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2847,6 +2847,13 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	struct workqueue_struct *wq;
 	unsigned int cpu;
 
+	/*
+	 * Workqueues which may be used during memory reclaim should
+	 * have a rescuer to guarantee forward progress.
+	 */
+	if (flags & WQ_MEM_RECLAIM)
+		flags |= WQ_RESCUER;
+
 	/*
 	 * Unbound workqueues aren't concurrency managed and should be
 	 * dispatched to workers immediately.
-- 
cgit v1.2.3


From daaae6b010ac0f60c9c35e481589966f9f1fcc22 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 19 Oct 2010 11:28:15 +0200
Subject: workqueue: remove in_workqueue_context()

Commit a25909a4 (lockdep: Add an in_workqueue_context() lockdep-based
test function) added in_workqueue_context() but there hasn't been any
in-kernel user and the lockdep annotation in workqueue is scheduled to
change.  Remove the unused function.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/workqueue.h |  4 ----
 kernel/workqueue.c        | 15 ---------------
 2 files changed, 19 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 03bbe903e5ce..070bb7a88936 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -430,8 +430,4 @@ extern bool freeze_workqueues_busy(void);
 extern void thaw_workqueues(void);
 #endif /* CONFIG_FREEZER */
 
-#ifdef CONFIG_LOCKDEP
-int in_workqueue_context(struct workqueue_struct *wq);
-#endif
-
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eb5c1972443a..30acdb74cc23 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -310,21 +310,6 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_NONE;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, (wq)))
 
-#ifdef CONFIG_LOCKDEP
-/**
- * in_workqueue_context() - in context of specified workqueue?
- * @wq: the workqueue of interest
- *
- * Checks lockdep state to see if the current task is executing from
- * within a workqueue item.  This function exists only if lockdep is
- * enabled.
- */
-int in_workqueue_context(struct workqueue_struct *wq)
-{
-	return lock_is_held(&wq->lockdep_map);
-}
-#endif
-
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
 static struct debug_obj_descr work_debug_descr;
-- 
cgit v1.2.3


From dd6414b50fa2b1cd247a8aa8f8bd42414b7453e1 Mon Sep 17 00:00:00 2001
From: Phil Carmody <ext-phil.2.carmody@nokia.com>
Date: Wed, 20 Oct 2010 15:57:33 -0700
Subject: timer: Permit statically-declared work with deferrable timers

Currently, you have to just define a delayed_work uninitialised, and then
initialise it before first use.  That's a tad clumsy.  At risk of playing
mind-games with the compiler, fooling it into doing pointer arithmetic
with compile-time-constants, this lets clients properly initialise delayed
work with deferrable timers statically.

This patch was inspired by the issues which lead Artem Bityutskiy to
commit 8eab945c5616fc984 ("sunrpc: make the cache cleaner workqueue
deferrable").

Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com>
Acked-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Cc: Arjan van de Ven <arjan@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h     | 25 +++++++++++++++++++++++++
 include/linux/workqueue.h |  8 ++++++++
 kernel/timer.c            | 15 +--------------
 3 files changed, 34 insertions(+), 14 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index 1794674c1a52..cbfb7a355d30 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -48,6 +48,18 @@ extern struct tvec_base boot_tvec_bases;
 #define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
 #endif
 
+/*
+ * Note that all tvec_bases are 2 byte aligned and lower bit of
+ * base in timer_list is guaranteed to be zero. Use the LSB to
+ * indicate whether the timer is deferrable.
+ *
+ * A deferrable timer will work normally when the system is busy, but
+ * will not cause a CPU to come out of idle just to service it; instead,
+ * the timer will be serviced when the CPU eventually wakes up with a
+ * subsequent non-deferrable timer.
+ */
+#define TBASE_DEFERRABLE_FLAG		(0x1)
+
 #define TIMER_INITIALIZER(_function, _expires, _data) {		\
 		.entry = { .prev = TIMER_ENTRY_STATIC },	\
 		.function = (_function),			\
@@ -59,6 +71,19 @@ extern struct tvec_base boot_tvec_bases;
 			__FILE__ ":" __stringify(__LINE__))	\
 	}
 
+#define TBASE_MAKE_DEFERRED(ptr) ((struct tvec_base *)		\
+		  ((unsigned char *)(ptr) + TBASE_DEFERRABLE_FLAG))
+
+#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) {\
+		.entry = { .prev = TIMER_ENTRY_STATIC },	\
+		.function = (_function),			\
+		.expires = (_expires),				\
+		.data = (_data),				\
+		.base = TBASE_MAKE_DEFERRED(&boot_tvec_bases),	\
+		__TIMER_LOCKDEP_MAP_INITIALIZER(		\
+			__FILE__ ":" __stringify(__LINE__))	\
+	}
+
 #define DEFINE_TIMER(_name, _function, _expires, _data)		\
 	struct timer_list _name =				\
 		TIMER_INITIALIZER(_function, _expires, _data)
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f11100f96482..88238c15ec3e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -127,12 +127,20 @@ struct execute_work {
 	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
 	}
 
+#define __DEFERRED_WORK_INITIALIZER(n, f) {			\
+	.work = __WORK_INITIALIZER((n).work, (f)),		\
+	.timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0),	\
+	}
+
 #define DECLARE_WORK(n, f)					\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
 #define DECLARE_DELAYED_WORK(n, f)				\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
+#define DECLARE_DEFERRED_WORK(n, f)				\
+	struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
+
 /*
  * initialize a work item's function pointer
  */
diff --git a/kernel/timer.c b/kernel/timer.c
index 97bf05baade7..72853b256ff2 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -88,18 +88,6 @@ struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
-/*
- * Note that all tvec_bases are 2 byte aligned and lower bit of
- * base in timer_list is guaranteed to be zero. Use the LSB to
- * indicate whether the timer is deferrable.
- *
- * A deferrable timer will work normally when the system is busy, but
- * will not cause a CPU to come out of idle just to service it; instead,
- * the timer will be serviced when the CPU eventually wakes up with a
- * subsequent non-deferrable timer.
- */
-#define TBASE_DEFERRABLE_FLAG		(0x1)
-
 /* Functions below help us manage 'deferrable' flag */
 static inline unsigned int tbase_get_deferrable(struct tvec_base *base)
 {
@@ -113,8 +101,7 @@ static inline struct tvec_base *tbase_get_base(struct tvec_base *base)
 
 static inline void timer_set_deferrable(struct timer_list *timer)
 {
-	timer->base = ((struct tvec_base *)((unsigned long)(timer->base) |
-				       TBASE_DEFERRABLE_FLAG));
+	timer->base = TBASE_MAKE_DEFERRED(timer->base);
 }
 
 static inline void
-- 
cgit v1.2.3


From ca1cab37d91cbe8a8333732540d43cabb54cfa85 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 26 Oct 2010 14:22:34 -0700
Subject: workqueues: s/ON_STACK/ONSTACK/

Silly though it is, completions and wait_queue_heads use foo_ONSTACK
(COMPLETION_INITIALIZER_ONSTACK, DECLARE_COMPLETION_ONSTACK,
__WAIT_QUEUE_HEAD_INIT_ONSTACK and DECLARE_WAIT_QUEUE_HEAD_ONSTACK) so I
guess workqueues should do the same thing.

s/INIT_WORK_ON_STACK/INIT_WORK_ONSTACK/
s/INIT_DELAYED_WORK_ON_STACK/INIT_DELAYED_WORK_ONSTACK/

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/hpet.c          | 2 +-
 arch/x86/kernel/smpboot.c       | 2 +-
 drivers/md/dm-snap-persistent.c | 2 +-
 include/linux/workqueue.h       | 6 +++---
 kernel/workqueue.c              | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index aff0b3c27509..ae03cab4352e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -713,7 +713,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
 
 	switch (action & 0xf) {
 	case CPU_ONLINE:
-		INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
+		INIT_DELAYED_WORK_ONSTACK(&work.work, hpet_work);
 		init_completion(&work.complete);
 		/* FIXME: add schedule_work_on() */
 		schedule_delayed_work_on(cpu, &work.work, 0);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6af118511b4a..6c7faecd9e4a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -747,7 +747,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
 		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
 	};
 
-	INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);
+	INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
 
 	alternatives_smp_switch(1);
 
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index 0b61792a2780..2129cdb115dc 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -254,7 +254,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
 	 * Issue the synchronous I/O from a different thread
 	 * to avoid generic_make_request recursion.
 	 */
-	INIT_WORK_ON_STACK(&req.work, do_metadata);
+	INIT_WORK_ONSTACK(&req.work, do_metadata);
 	queue_work(ps->metadata_wq, &req.work);
 	flush_workqueue(ps->metadata_wq);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 070bb7a88936..0c0771f06bfa 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -190,7 +190,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__INIT_WORK((_work), (_func), 0);		\
 	} while (0)
 
-#define INIT_WORK_ON_STACK(_work, _func)			\
+#define INIT_WORK_ONSTACK(_work, _func)				\
 	do {							\
 		__INIT_WORK((_work), (_func), 1);		\
 	} while (0)
@@ -201,9 +201,9 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		init_timer(&(_work)->timer);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ON_STACK(_work, _func)		\
+#define INIT_DELAYED_WORK_ONSTACK(_work, _func)			\
 	do {							\
-		INIT_WORK_ON_STACK(&(_work)->work, (_func));	\
+		INIT_WORK_ONSTACK(&(_work)->work, (_func));	\
 		init_timer_on_stack(&(_work)->timer);		\
 	} while (0)
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e5ff2cbaadc2..90db1bd1a978 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2064,7 +2064,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	 * checks and call back into the fixup functions where we
 	 * might deadlock.
 	 */
-	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
+	INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
 	init_completion(&barr->done);
 
-- 
cgit v1.2.3


From ed41390fa57a21d06e6e3a3c4bc238bab8957fbb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 14 Dec 2010 16:23:10 +0100
Subject: workqueue: deprecate cancel_rearming_delayed_work[queue]()

There's no in-kernel user left for these two obsolete functions.  Mark
them deprecated and schedule for removal during 2.6.39 cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 Documentation/feature-removal-schedule.txt | 10 ++++++++++
 include/linux/workqueue.h                  |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 6c2f55e05f13..4ff47deb86da 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -564,3 +564,13 @@ Why:	This field is deprecated. I2C device drivers shouldn't change their
 Who:	Jean Delvare <khali@linux-fr.org>
 
 ----------------------------
+
+What:	cancel_rearming_delayed_work[queue]()
+When:	2.6.39
+
+Why:	The functions have been superceded by cancel_delayed_work_sync()
+	quite some time ago.  The conversion is trivial and there is no
+	in-kernel user left.
+Who:	Tejun Heo <tj@kernel.org>
+
+----------------------------
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0c0771f06bfa..6b5193d70268 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -401,7 +401,7 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 }
 
 /* Obsolete. use cancel_delayed_work_sync() */
-static inline
+static inline __deprecated
 void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 					struct delayed_work *work)
 {
@@ -409,7 +409,7 @@ void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
 }
 
 /* Obsolete. use cancel_delayed_work_sync() */
-static inline
+static inline __deprecated
 void cancel_rearming_delayed_work(struct delayed_work *work)
 {
 	cancel_delayed_work_sync(work);
-- 
cgit v1.2.3


From 4149efb22da66e326fc48baf80d628834509f7f0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Feb 2011 10:39:03 +0100
Subject: workqueue: add system_freezeable_wq

Add system wide freezeable workqueue.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
---
 include/linux/workqueue.h | 4 ++++
 kernel/workqueue.c        | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1ac11586a2f5..de6a755befac 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -286,11 +286,15 @@ enum {
  * any specific CPU, not concurrency managed, and all queued works are
  * executed immediately as long as max_active limit is not reached and
  * resources are available.
+ *
+ * system_freezeable_wq is equivalent to system_wq except that it's
+ * freezeable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_freezeable_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 11869faa6819..28f8bd08f0e7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -249,10 +249,12 @@ struct workqueue_struct *system_wq __read_mostly;
 struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
 struct workqueue_struct *system_unbound_wq __read_mostly;
+struct workqueue_struct *system_freezeable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
 EXPORT_SYMBOL_GPL(system_unbound_wq);
+EXPORT_SYMBOL_GPL(system_freezeable_wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3764,8 +3766,10 @@ static int __init init_workqueues(void)
 	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
+	system_freezeable_wq = alloc_workqueue("events_freezeable",
+					       WQ_FREEZEABLE, 0);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-	       !system_unbound_wq);
+	       !system_unbound_wq || !system_freezeable_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From 58a69cb47ec6991bf006a3e5d202e8571b0327a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 16 Feb 2011 09:25:31 +0100
Subject: workqueue, freezer: unify spelling of 'freeze' + 'able' to
 'freezable'

There are two spellings in use for 'freeze' + 'able' - 'freezable' and
'freezeable'.  The former is the more prominent one.  The latter is
mostly used by workqueue and in a few other odd places.  Unify the
spelling to 'freezable'.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Alex Dubov <oakad@yahoo.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Steven Whitehouse <swhiteho@redhat.com>
---
 Documentation/workqueue.txt      |  4 ++--
 drivers/memstick/core/memstick.c |  2 +-
 drivers/misc/tifm_core.c         |  2 +-
 drivers/misc/vmw_balloon.c       |  2 +-
 drivers/mtd/nand/r852.c          |  2 +-
 drivers/mtd/sm_ftl.c             |  2 +-
 drivers/net/can/mcp251x.c        |  2 +-
 drivers/tty/serial/max3100.c     |  2 +-
 drivers/tty/serial/max3107.c     |  2 +-
 fs/gfs2/glock.c                  |  4 ++--
 fs/gfs2/main.c                   |  2 +-
 include/linux/freezer.h          |  2 +-
 include/linux/sched.h            |  2 +-
 include/linux/workqueue.h        |  8 ++++----
 kernel/power/main.c              |  2 +-
 kernel/power/process.c           |  6 +++---
 kernel/workqueue.c               | 24 ++++++++++++------------
 17 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index 996a27d9b8db..01c513fac40e 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -190,9 +190,9 @@ resources, scheduled and executed.
 	* Long running CPU intensive workloads which can be better
 	  managed by the system scheduler.
 
-  WQ_FREEZEABLE
+  WQ_FREEZABLE
 
-	A freezeable wq participates in the freeze phase of the system
+	A freezable wq participates in the freeze phase of the system
 	suspend operations.  Work items on the wq are drained and no
 	new work item starts execution until thawed.
 
diff --git a/drivers/memstick/core/memstick.c b/drivers/memstick/core/memstick.c
index e9a3eab7b0cf..8c1d85e27be4 100644
--- a/drivers/memstick/core/memstick.c
+++ b/drivers/memstick/core/memstick.c
@@ -621,7 +621,7 @@ static int __init memstick_init(void)
 {
 	int rc;
 
-	workqueue = create_freezeable_workqueue("kmemstick");
+	workqueue = create_freezable_workqueue("kmemstick");
 	if (!workqueue)
 		return -ENOMEM;
 
diff --git a/drivers/misc/tifm_core.c b/drivers/misc/tifm_core.c
index 5f6852dff40b..44d4475a09dd 100644
--- a/drivers/misc/tifm_core.c
+++ b/drivers/misc/tifm_core.c
@@ -329,7 +329,7 @@ static int __init tifm_init(void)
 {
 	int rc;
 
-	workqueue = create_freezeable_workqueue("tifm");
+	workqueue = create_freezable_workqueue("tifm");
 	if (!workqueue)
 		return -ENOMEM;
 
diff --git a/drivers/misc/vmw_balloon.c b/drivers/misc/vmw_balloon.c
index 4d2ea8e80140..6df5a55da110 100644
--- a/drivers/misc/vmw_balloon.c
+++ b/drivers/misc/vmw_balloon.c
@@ -785,7 +785,7 @@ static int __init vmballoon_init(void)
 	if (x86_hyper != &x86_hyper_vmware)
 		return -ENODEV;
 
-	vmballoon_wq = create_freezeable_workqueue("vmmemctl");
+	vmballoon_wq = create_freezable_workqueue("vmmemctl");
 	if (!vmballoon_wq) {
 		pr_err("failed to create workqueue\n");
 		return -ENOMEM;
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index d9d7efbc77cc..6322d1fb5d62 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -930,7 +930,7 @@ int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 
 	init_completion(&dev->dma_done);
 
-	dev->card_workqueue = create_freezeable_workqueue(DRV_NAME);
+	dev->card_workqueue = create_freezable_workqueue(DRV_NAME);
 
 	if (!dev->card_workqueue)
 		goto error9;
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index 67822cf6c025..ac0d6a8613b5 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -1258,7 +1258,7 @@ static struct mtd_blktrans_ops sm_ftl_ops = {
 static __init int sm_module_init(void)
 {
 	int error = 0;
-	cache_flush_workqueue = create_freezeable_workqueue("smflush");
+	cache_flush_workqueue = create_freezable_workqueue("smflush");
 
 	if (IS_ERR(cache_flush_workqueue))
 		return PTR_ERR(cache_flush_workqueue);
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 7ab534aee452..7513c4523ac4 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -940,7 +940,7 @@ static int mcp251x_open(struct net_device *net)
 		goto open_unlock;
 	}
 
-	priv->wq = create_freezeable_workqueue("mcp251x_wq");
+	priv->wq = create_freezable_workqueue("mcp251x_wq");
 	INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
 	INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
 
diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index beb1afa27d8d..7b951adac54b 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -601,7 +601,7 @@ static int max3100_startup(struct uart_port *port)
 	s->rts = 0;
 
 	sprintf(b, "max3100-%d", s->minor);
-	s->workqueue = create_freezeable_workqueue(b);
+	s->workqueue = create_freezable_workqueue(b);
 	if (!s->workqueue) {
 		dev_warn(&s->spi->dev, "cannot create workqueue\n");
 		return -EBUSY;
diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c
index 910870edf708..750b4f627315 100644
--- a/drivers/tty/serial/max3107.c
+++ b/drivers/tty/serial/max3107.c
@@ -833,7 +833,7 @@ static int max3107_startup(struct uart_port *port)
 	struct max3107_port *s = container_of(port, struct max3107_port, port);
 
 	/* Initialize work queue */
-	s->workqueue = create_freezeable_workqueue("max3107");
+	s->workqueue = create_freezable_workqueue("max3107");
 	if (!s->workqueue) {
 		dev_err(&s->spi->dev, "Workqueue creation failed\n");
 		return -EBUSY;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 08a8beb152e6..7cd9a5a68d59 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1779,11 +1779,11 @@ int __init gfs2_glock_init(void)
 #endif
 
 	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
-					  WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+					  WQ_HIGHPRI | WQ_FREEZABLE, 0);
 	if (IS_ERR(glock_workqueue))
 		return PTR_ERR(glock_workqueue);
 	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
-						WQ_MEM_RECLAIM | WQ_FREEZEABLE,
+						WQ_MEM_RECLAIM | WQ_FREEZABLE,
 						0);
 	if (IS_ERR(gfs2_delete_workqueue)) {
 		destroy_workqueue(glock_workqueue);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index ebef7ab6e17e..85ba027d1c4d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -144,7 +144,7 @@ static int __init init_gfs2_fs(void)
 
 	error = -ENOMEM;
 	gfs_recovery_wq = alloc_workqueue("gfs_recovery",
-					  WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0);
+					  WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
 	if (!gfs_recovery_wq)
 		goto fail_wq;
 
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index da7e52b099f3..1effc8b56b4e 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -109,7 +109,7 @@ static inline void freezer_count(void)
 }
 
 /*
- * Check if the task should be counted as freezeable by the freezer
+ * Check if the task should be counted as freezable by the freezer
  */
 static inline int freezer_should_skip(struct task_struct *p)
 {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d747f948b34e..777d8a5ed06b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1744,7 +1744,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
 #define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
-#define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
+#define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
 #define PF_FREEZER_NOSIG 0x80000000	/* Freezer won't send signals to it */
 
 /*
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1ac11586a2f5..f7998a3bf020 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -250,7 +250,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 enum {
 	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
-	WQ_FREEZEABLE		= 1 << 2, /* freeze during suspend */
+	WQ_FREEZABLE		= 1 << 2, /* freeze during suspend */
 	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
@@ -318,7 +318,7 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
  * @name: name of the workqueue
- * @flags: WQ_* flags (only WQ_FREEZEABLE and WQ_MEM_RECLAIM are meaningful)
+ * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
@@ -335,8 +335,8 @@ alloc_ordered_workqueue(const char *name, unsigned int flags)
 
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
-#define create_freezeable_workqueue(name)			\
-	alloc_workqueue((name), WQ_FREEZEABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
+#define create_freezable_workqueue(name)			\
+	alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 #define create_singlethread_workqueue(name)			\
 	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 7b5db6a8561e..701853042c28 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -326,7 +326,7 @@ EXPORT_SYMBOL_GPL(pm_wq);
 
 static int __init pm_start_workqueue(void)
 {
-	pm_wq = alloc_workqueue("pm", WQ_FREEZEABLE, 0);
+	pm_wq = alloc_workqueue("pm", WQ_FREEZABLE, 0);
 
 	return pm_wq ? 0 : -ENOMEM;
 }
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d6d2a10320e0..0cf3a27a6c9d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -22,7 +22,7 @@
  */
 #define TIMEOUT	(20 * HZ)
 
-static inline int freezeable(struct task_struct * p)
+static inline int freezable(struct task_struct * p)
 {
 	if ((p == current) ||
 	    (p->flags & PF_NOFREEZE) ||
@@ -53,7 +53,7 @@ static int try_to_freeze_tasks(bool sig_only)
 		todo = 0;
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
-			if (frozen(p) || !freezeable(p))
+			if (frozen(p) || !freezable(p))
 				continue;
 
 			if (!freeze_task(p, sig_only))
@@ -167,7 +167,7 @@ static void thaw_tasks(bool nosig_only)
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
-		if (!freezeable(p))
+		if (!freezable(p))
 			continue;
 
 		if (nosig_only && should_send_signal(p))
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 90a17ca2ad0b..88a3e34f51f6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2965,7 +2965,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	 */
 	spin_lock(&workqueue_lock);
 
-	if (workqueue_freezing && wq->flags & WQ_FREEZEABLE)
+	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
 		for_each_cwq_cpu(cpu, wq)
 			get_cwq(cpu, wq)->max_active = 0;
 
@@ -3077,7 +3077,7 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 		spin_lock_irq(&gcwq->lock);
 
-		if (!(wq->flags & WQ_FREEZEABLE) ||
+		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(gcwq->flags & GCWQ_FREEZING))
 			get_cwq(gcwq->cpu, wq)->max_active = max_active;
 
@@ -3327,7 +3327,7 @@ static int __cpuinit trustee_thread(void *__gcwq)
 	 * want to get it over with ASAP - spam rescuers, wake up as
 	 * many idlers as necessary and create new ones till the
 	 * worklist is empty.  Note that if the gcwq is frozen, there
-	 * may be frozen works in freezeable cwqs.  Don't declare
+	 * may be frozen works in freezable cwqs.  Don't declare
 	 * completion while frozen.
 	 */
 	while (gcwq->nr_workers != gcwq->nr_idle ||
@@ -3585,9 +3585,9 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
 /**
  * freeze_workqueues_begin - begin freezing workqueues
  *
- * Start freezing workqueues.  After this function returns, all
- * freezeable workqueues will queue new works to their frozen_works
- * list instead of gcwq->worklist.
+ * Start freezing workqueues.  After this function returns, all freezable
+ * workqueues will queue new works to their frozen_works list instead of
+ * gcwq->worklist.
  *
  * CONTEXT:
  * Grabs and releases workqueue_lock and gcwq->lock's.
@@ -3613,7 +3613,7 @@ void freeze_workqueues_begin(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (cwq && wq->flags & WQ_FREEZEABLE)
+			if (cwq && wq->flags & WQ_FREEZABLE)
 				cwq->max_active = 0;
 		}
 
@@ -3624,7 +3624,7 @@ void freeze_workqueues_begin(void)
 }
 
 /**
- * freeze_workqueues_busy - are freezeable workqueues still busy?
+ * freeze_workqueues_busy - are freezable workqueues still busy?
  *
  * Check whether freezing is complete.  This function must be called
  * between freeze_workqueues_begin() and thaw_workqueues().
@@ -3633,8 +3633,8 @@ void freeze_workqueues_begin(void)
  * Grabs and releases workqueue_lock.
  *
  * RETURNS:
- * %true if some freezeable workqueues are still busy.  %false if
- * freezing is complete.
+ * %true if some freezable workqueues are still busy.  %false if freezing
+ * is complete.
  */
 bool freeze_workqueues_busy(void)
 {
@@ -3654,7 +3654,7 @@ bool freeze_workqueues_busy(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZEABLE))
+			if (!cwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
 			BUG_ON(cwq->nr_active < 0);
@@ -3699,7 +3699,7 @@ void thaw_workqueues(void)
 		list_for_each_entry(wq, &workqueues, list) {
 			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZEABLE))
+			if (!cwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
 			/* restore max_active and repopulate worklist */
-- 
cgit v1.2.3


From 24d51add7438f9696a7205927bf9de3c5c787a58 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 21 Feb 2011 09:52:50 +0100
Subject: workqueue: fix build failure introduced by s/freezeable/freezable/

wq:fixes-2.6.38 does s/WQ_FREEZEABLE/WQ_FREEZABLE and wq:for-2.6.39
adds new usage of the flag.  The combination of the two creates a
build failure after merge.  Fix it by renaming all freezeables to
freezables.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 include/linux/workqueue.h |  6 +++---
 kernel/workqueue.c        | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d110cc4f9fed..f584aba78ca9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -287,14 +287,14 @@ enum {
  * executed immediately as long as max_active limit is not reached and
  * resources are available.
  *
- * system_freezeable_wq is equivalent to system_wq except that it's
- * freezeable.
+ * system_freezable_wq is equivalent to system_wq except that it's
+ * freezable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
-extern struct workqueue_struct *system_freezeable_wq;
+extern struct workqueue_struct *system_freezable_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 572f559f6cb9..1b64d225f067 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -251,12 +251,12 @@ struct workqueue_struct *system_wq __read_mostly;
 struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
 struct workqueue_struct *system_unbound_wq __read_mostly;
-struct workqueue_struct *system_freezeable_wq __read_mostly;
+struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
 EXPORT_SYMBOL_GPL(system_unbound_wq);
-EXPORT_SYMBOL_GPL(system_freezeable_wq);
+EXPORT_SYMBOL_GPL(system_freezable_wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3777,10 +3777,10 @@ static int __init init_workqueues(void)
 	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
-	system_freezeable_wq = alloc_workqueue("events_freezeable",
-					       WQ_FREEZEABLE, 0);
+	system_freezable_wq = alloc_workqueue("events_freezable",
+					      WQ_FREEZABLE, 0);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-	       !system_unbound_wq || !system_freezeable_wq);
+	       !system_unbound_wq || !system_freezable_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From 2543a87108d2af7d48a43b3d6685c2b1ea279e36 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 6 Apr 2011 10:43:11 +0200
Subject: workqueue: remove cancel_rearming_delayed_work[queue]()

cancel_rearming_delayed_work() and cancel_rearming_delayed_workqueue()
can be removed now.

Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/feature-removal-schedule.txt | 10 ----------
 include/linux/workqueue.h                  | 15 ---------------
 2 files changed, 25 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index f6a24e8aa11e..e472054382c3 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -513,16 +513,6 @@ Who:    NeilBrown <neilb@suse.de>
 
 ----------------------------
 
-What:	cancel_rearming_delayed_work[queue]()
-When:	2.6.39
-
-Why:	The functions have been superceded by cancel_delayed_work_sync()
-	quite some time ago.  The conversion is trivial and there is no
-	in-kernel user left.
-Who:	Tejun Heo <tj@kernel.org>
-
-----------------------------
-
 What:	Legacy, non-standard chassis intrusion detection interface.
 When:	June 2011
 Why:	The adm9240, w83792d and w83793 hardware monitoring drivers have
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f584aba78ca9..57b31b3d83bd 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -412,21 +412,6 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
-/* Obsolete. use cancel_delayed_work_sync() */
-static inline __deprecated
-void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
-					struct delayed_work *work)
-{
-	cancel_delayed_work_sync(work);
-}
-
-/* Obsolete. use cancel_delayed_work_sync() */
-static inline __deprecated
-void cancel_rearming_delayed_work(struct delayed_work *work)
-{
-	cancel_delayed_work_sync(work);
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 {
-- 
cgit v1.2.3


From 9c5a2ba70251ecaab18c7a83e38b3c620223476c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 5 Apr 2011 18:01:44 +0200
Subject: workqueue: separate out drain_workqueue() from destroy_workqueue()

There are users which want to drain workqueues without destroying it.
Separate out drain functionality from destroy_workqueue() into
drain_workqueue() and make it accessible to workqueue users.

To guarantee forward-progress, only chain queueing is allowed while
drain is in progress.  If a new work item which isn't chained from the
running or pending work items is queued while draining is in progress,
WARN_ON_ONCE() is triggered.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <James.Bottomley@hansenpartnership.com>
---
 include/linux/workqueue.h |  3 +-
 kernel/workqueue.c        | 81 +++++++++++++++++++++++++++++++----------------
 2 files changed, 55 insertions(+), 29 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 57b31b3d83bd..2be2887c6958 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -255,7 +255,7 @@ enum {
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
-	WQ_DYING		= 1 << 6, /* internal: workqueue is dying */
+	WQ_DRAINING		= 1 << 6, /* internal: workqueue is draining */
 	WQ_RESCUER		= 1 << 7, /* internal: workqueue has rescuer */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
@@ -355,6 +355,7 @@ extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
+extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
 extern int schedule_work(struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index e3378e8d3a5c..25c8afeaeae8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -221,7 +221,7 @@ typedef unsigned long mayday_mask_t;
  * per-CPU workqueues:
  */
 struct workqueue_struct {
-	unsigned int		flags;		/* I: WQ_* flags */
+	unsigned int		flags;		/* W: WQ_* flags */
 	union {
 		struct cpu_workqueue_struct __percpu	*pcpu;
 		struct cpu_workqueue_struct		*single;
@@ -240,6 +240,7 @@ struct workqueue_struct {
 	mayday_mask_t		mayday_mask;	/* cpus requesting rescue */
 	struct worker		*rescuer;	/* I: rescue worker */
 
+	int			nr_drainers;	/* W: drain in progress */
 	int			saved_max_active; /* W: saved cwq max_active */
 	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
@@ -990,7 +991,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	debug_work_activate(work);
 
 	/* if dying, only works from the same workqueue are allowed */
-	if (unlikely(wq->flags & WQ_DYING) &&
+	if (unlikely(wq->flags & WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
@@ -2381,6 +2382,54 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(flush_workqueue);
 
+/**
+ * drain_workqueue - drain a workqueue
+ * @wq: workqueue to drain
+ *
+ * Wait until the workqueue becomes empty.  While draining is in progress,
+ * only chain queueing is allowed.  IOW, only currently pending or running
+ * work items on @wq can queue further work items on it.  @wq is flushed
+ * repeatedly until it becomes empty.  The number of flushing is detemined
+ * by the depth of chaining and should be relatively short.  Whine if it
+ * takes too long.
+ */
+void drain_workqueue(struct workqueue_struct *wq)
+{
+	unsigned int flush_cnt = 0;
+	unsigned int cpu;
+
+	/*
+	 * __queue_work() needs to test whether there are drainers, is much
+	 * hotter than drain_workqueue() and already looks at @wq->flags.
+	 * Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
+	 */
+	spin_lock(&workqueue_lock);
+	if (!wq->nr_drainers++)
+		wq->flags |= WQ_DRAINING;
+	spin_unlock(&workqueue_lock);
+reflush:
+	flush_workqueue(wq);
+
+	for_each_cwq_cpu(cpu, wq) {
+		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+
+		if (!cwq->nr_active && list_empty(&cwq->delayed_works))
+			continue;
+
+		if (++flush_cnt == 10 ||
+		    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
+			pr_warning("workqueue %s: flush on destruction isn't complete after %u tries\n",
+				   wq->name, flush_cnt);
+		goto reflush;
+	}
+
+	spin_lock(&workqueue_lock);
+	if (!--wq->nr_drainers)
+		wq->flags &= ~WQ_DRAINING;
+	spin_unlock(&workqueue_lock);
+}
+EXPORT_SYMBOL_GPL(drain_workqueue);
+
 static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 			     bool wait_executing)
 {
@@ -3011,34 +3060,10 @@ EXPORT_SYMBOL_GPL(__alloc_workqueue_key);
  */
 void destroy_workqueue(struct workqueue_struct *wq)
 {
-	unsigned int flush_cnt = 0;
 	unsigned int cpu;
 
-	/*
-	 * Mark @wq dying and drain all pending works.  Once WQ_DYING is
-	 * set, only chain queueing is allowed.  IOW, only currently
-	 * pending or running work items on @wq can queue further work
-	 * items on it.  @wq is flushed repeatedly until it becomes empty.
-	 * The number of flushing is detemined by the depth of chaining and
-	 * should be relatively short.  Whine if it takes too long.
-	 */
-	wq->flags |= WQ_DYING;
-reflush:
-	flush_workqueue(wq);
-
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-
-		if (!cwq->nr_active && list_empty(&cwq->delayed_works))
-			continue;
-
-		if (++flush_cnt == 10 ||
-		    (flush_cnt % 100 == 0 && flush_cnt <= 1000))
-			printk(KERN_WARNING "workqueue %s: flush on "
-			       "destruction isn't complete after %u tries\n",
-			       wq->name, flush_cnt);
-		goto reflush;
-	}
+	/* drain it before proceeding with destruction */
+	drain_workqueue(wq);
 
 	/*
 	 * wq list is used to freeze wq, remove from list after
-- 
cgit v1.2.3


From 60063497a95e716c9a689af3be2687d261f115b4 Mon Sep 17 00:00:00 2001
From: Arun Sharma <asharma@fb.com>
Date: Tue, 26 Jul 2011 16:09:06 -0700
Subject: atomic: use <linux/atomic.h>

This allows us to move duplicated code in <asm/atomic.h>
(atomic_inc_not_zero() for now) to <linux/atomic.h>

Signed-off-by: Arun Sharma <asharma@fb.com>
Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/atomic.h                         | 1 -
 arch/alpha/include/asm/local.h                          | 2 +-
 arch/alpha/kernel/perf_event.c                          | 2 +-
 arch/alpha/kernel/smp.c                                 | 2 +-
 arch/alpha/lib/dec_and_lock.c                           | 2 +-
 arch/arm/include/asm/atomic.h                           | 1 -
 arch/arm/kernel/smp.c                                   | 2 +-
 arch/arm/kernel/traps.c                                 | 2 +-
 arch/arm/mach-at91/pm.c                                 | 2 +-
 arch/arm/mach-bcmring/dma.c                             | 2 +-
 arch/arm/mach-cns3xxx/include/mach/pm.h                 | 2 +-
 arch/arm/mach-cns3xxx/pm.c                              | 2 +-
 arch/arm/mach-omap1/pm.c                                | 2 +-
 arch/arm/mach-s3c2440/clock.c                           | 2 +-
 arch/arm/mach-s3c2440/s3c2442.c                         | 2 +-
 arch/arm/mach-s3c2440/s3c244x-clock.c                   | 2 +-
 arch/avr32/include/asm/atomic.h                         | 1 -
 arch/blackfin/include/asm/atomic.h                      | 1 -
 arch/blackfin/include/asm/dma.h                         | 2 +-
 arch/blackfin/include/asm/ipipe.h                       | 2 +-
 arch/blackfin/include/asm/spinlock.h                    | 2 +-
 arch/blackfin/kernel/ftrace.c                           | 2 +-
 arch/blackfin/kernel/ipipe.c                            | 2 +-
 arch/blackfin/kernel/nmi.c                              | 2 +-
 arch/blackfin/mach-common/smp.c                         | 2 +-
 arch/cris/arch-v32/drivers/cryptocop.c                  | 2 +-
 arch/cris/arch-v32/kernel/smp.c                         | 2 +-
 arch/cris/include/asm/atomic.h                          | 1 -
 arch/cris/include/asm/bitops.h                          | 2 +-
 arch/cris/kernel/process.c                              | 2 +-
 arch/frv/include/asm/atomic.h                           | 1 -
 arch/frv/include/asm/hardirq.h                          | 2 +-
 arch/frv/kernel/irq.c                                   | 2 +-
 arch/h8300/include/asm/atomic.h                         | 1 -
 arch/ia64/include/asm/atomic.h                          | 1 -
 arch/ia64/include/asm/processor.h                       | 2 +-
 arch/ia64/include/asm/spinlock.h                        | 2 +-
 arch/ia64/kernel/smp.c                                  | 2 +-
 arch/ia64/kernel/smpboot.c                              | 2 +-
 arch/ia64/kernel/uncached.c                             | 2 +-
 arch/m32r/include/asm/atomic.h                          | 1 -
 arch/m32r/include/asm/mmu_context.h                     | 2 +-
 arch/m32r/include/asm/spinlock.h                        | 2 +-
 arch/m32r/kernel/smp.c                                  | 2 +-
 arch/m32r/kernel/traps.c                                | 2 +-
 arch/m68k/include/asm/atomic.h                          | 1 -
 arch/microblaze/include/asm/mmu_context_mm.h            | 2 +-
 arch/microblaze/include/asm/prom.h                      | 2 +-
 arch/mips/include/asm/atomic.h                          | 1 -
 arch/mips/include/asm/hw_irq.h                          | 2 +-
 arch/mips/include/asm/local.h                           | 2 +-
 arch/mips/include/asm/smp.h                             | 2 +-
 arch/mips/kernel/irq.c                                  | 2 +-
 arch/mips/kernel/mips-mt.c                              | 2 +-
 arch/mips/kernel/rtlx.c                                 | 2 +-
 arch/mips/kernel/smp-cmp.c                              | 2 +-
 arch/mips/kernel/smp-mt.c                               | 2 +-
 arch/mips/kernel/smp.c                                  | 2 +-
 arch/mips/kernel/smtc-proc.c                            | 2 +-
 arch/mips/kernel/smtc.c                                 | 2 +-
 arch/mips/kernel/sync-r4k.c                             | 2 +-
 arch/mips/kernel/vpe.c                                  | 2 +-
 arch/mips/mipssim/sim_smtc.c                            | 2 +-
 arch/mips/sgi-ip27/ip27-nmi.c                           | 2 +-
 arch/mn10300/include/asm/atomic.h                       | 1 -
 arch/mn10300/include/asm/mmu_context.h                  | 2 +-
 arch/mn10300/include/asm/spinlock.h                     | 2 +-
 arch/mn10300/include/asm/system.h                       | 2 +-
 arch/mn10300/kernel/mn10300-watchdog.c                  | 2 +-
 arch/mn10300/kernel/traps.c                             | 2 +-
 arch/mn10300/mm/misalignment.c                          | 2 +-
 arch/mn10300/proc-mn2ws0050/proc-init.c                 | 2 +-
 arch/parisc/include/asm/atomic.h                        | 1 -
 arch/parisc/include/asm/bitops.h                        | 2 +-
 arch/parisc/include/asm/mmu_context.h                   | 2 +-
 arch/parisc/kernel/parisc_ksyms.c                       | 2 +-
 arch/parisc/kernel/smp.c                                | 2 +-
 arch/parisc/kernel/traps.c                              | 2 +-
 arch/parisc/lib/bitops.c                                | 2 +-
 arch/powerpc/include/asm/atomic.h                       | 1 -
 arch/powerpc/include/asm/emulated_ops.h                 | 2 +-
 arch/powerpc/include/asm/irq.h                          | 2 +-
 arch/powerpc/include/asm/local.h                        | 2 +-
 arch/powerpc/include/asm/prom.h                         | 2 +-
 arch/powerpc/kernel/of_platform.c                       | 2 +-
 arch/powerpc/kernel/ppc_ksyms.c                         | 2 +-
 arch/powerpc/kernel/rtas.c                              | 2 +-
 arch/powerpc/kernel/rtasd.c                             | 2 +-
 arch/powerpc/kernel/smp-tbsync.c                        | 2 +-
 arch/powerpc/kernel/smp.c                               | 2 +-
 arch/powerpc/platforms/83xx/km83xx.c                    | 2 +-
 arch/powerpc/platforms/83xx/mpc832x_mds.c               | 2 +-
 arch/powerpc/platforms/83xx/mpc834x_itx.c               | 2 +-
 arch/powerpc/platforms/83xx/mpc834x_mds.c               | 2 +-
 arch/powerpc/platforms/83xx/mpc836x_mds.c               | 2 +-
 arch/powerpc/platforms/83xx/sbc834x.c                   | 2 +-
 arch/powerpc/platforms/85xx/mpc85xx_cds.c               | 2 +-
 arch/powerpc/platforms/85xx/mpc85xx_mds.c               | 2 +-
 arch/powerpc/platforms/85xx/sbc8548.c                   | 2 +-
 arch/powerpc/platforms/cell/cpufreq_spudemand.c         | 2 +-
 arch/powerpc/platforms/cell/smp.c                       | 2 +-
 arch/powerpc/platforms/cell/spufs/context.c             | 2 +-
 arch/powerpc/platforms/chrp/smp.c                       | 2 +-
 arch/powerpc/platforms/iseries/smp.c                    | 2 +-
 arch/powerpc/platforms/powermac/backlight.c             | 2 +-
 arch/powerpc/platforms/powermac/smp.c                   | 2 +-
 arch/powerpc/platforms/pseries/eeh.c                    | 2 +-
 arch/powerpc/platforms/pseries/eeh_cache.c              | 2 +-
 arch/powerpc/platforms/pseries/smp.c                    | 2 +-
 arch/powerpc/sysdev/fsl_soc.c                           | 2 +-
 arch/powerpc/sysdev/tsi108_dev.c                        | 2 +-
 arch/s390/include/asm/atomic.h                          | 1 -
 arch/s390/kernel/dis.c                                  | 2 +-
 arch/s390/kernel/traps.c                                | 2 +-
 arch/sh/include/asm/atomic.h                            | 1 -
 arch/sh/include/asm/hw_irq.h                            | 2 +-
 arch/sh/include/asm/smp.h                               | 2 +-
 arch/sh/kernel/idle.c                                   | 2 +-
 arch/sh/kernel/smp.c                                    | 2 +-
 arch/sh/kernel/traps_64.c                               | 2 +-
 arch/sh/kernel/unwinder.c                               | 2 +-
 arch/sparc/include/asm/atomic_32.h                      | 1 -
 arch/sparc/include/asm/atomic_64.h                      | 1 -
 arch/sparc/include/asm/prom.h                           | 2 +-
 arch/sparc/include/asm/smp_32.h                         | 2 +-
 arch/sparc/include/asm/smp_64.h                         | 2 +-
 arch/sparc/kernel/irq_64.c                              | 2 +-
 arch/sparc/kernel/leon_smp.c                            | 2 +-
 arch/sparc/kernel/perf_event.c                          | 2 +-
 arch/sparc/kernel/smp_32.c                              | 2 +-
 arch/sparc/kernel/smp_64.c                              | 2 +-
 arch/sparc/lib/atomic32.c                               | 2 +-
 arch/tile/include/asm/atomic.h                          | 9 ---------
 arch/tile/include/asm/atomic_32.h                       | 4 ++--
 arch/tile/include/asm/atomic_64.h                       | 2 +-
 arch/tile/include/asm/bitops_32.h                       | 2 +-
 arch/tile/include/asm/bitops_64.h                       | 2 +-
 arch/tile/include/asm/spinlock_32.h                     | 2 +-
 arch/tile/kernel/intvec_32.S                            | 2 +-
 arch/tile/lib/atomic_32.c                               | 2 +-
 arch/tile/lib/atomic_asm_32.S                           | 2 +-
 arch/x86/ia32/sys_ia32.c                                | 2 +-
 arch/x86/include/asm/apic.h                             | 2 +-
 arch/x86/include/asm/atomic.h                           | 1 -
 arch/x86/include/asm/hw_irq.h                           | 2 +-
 arch/x86/include/asm/local.h                            | 2 +-
 arch/x86/include/asm/mce.h                              | 2 +-
 arch/x86/include/asm/mmu_context.h                      | 2 +-
 arch/x86/include/asm/prom.h                             | 2 +-
 arch/x86/include/asm/spinlock.h                         | 2 +-
 arch/x86/include/asm/thread_info.h                      | 2 +-
 arch/x86/kernel/amd_gart_64.c                           | 2 +-
 arch/x86/kernel/apic/apic.c                             | 2 +-
 arch/x86/kernel/apic/es7000_32.c                        | 2 +-
 arch/x86/kernel/cpu/common.c                            | 2 +-
 arch/x86/kernel/i8259.c                                 | 2 +-
 arch/x86/kernel/irqinit.c                               | 2 +-
 arch/x86/kernel/traps.c                                 | 2 +-
 arch/x86/kvm/lapic.c                                    | 2 +-
 arch/x86/kvm/timer.c                                    | 2 +-
 arch/x86/lib/atomic64_32.c                              | 2 +-
 arch/x86/mm/mmio-mod.c                                  | 2 +-
 arch/xtensa/include/asm/atomic.h                        | 1 -
 arch/xtensa/kernel/process.c                            | 2 +-
 crypto/af_alg.c                                         | 2 +-
 crypto/proc.c                                           | 2 +-
 crypto/rng.c                                            | 2 +-
 drivers/atm/ambassador.c                                | 2 +-
 drivers/atm/atmtcp.c                                    | 2 +-
 drivers/atm/eni.c                                       | 2 +-
 drivers/atm/eni.h                                       | 2 +-
 drivers/atm/firestream.c                                | 2 +-
 drivers/atm/fore200e.c                                  | 2 +-
 drivers/atm/horizon.c                                   | 2 +-
 drivers/atm/idt77252.c                                  | 2 +-
 drivers/atm/iphase.c                                    | 2 +-
 drivers/atm/nicstar.c                                   | 2 +-
 drivers/atm/suni.c                                      | 2 +-
 drivers/atm/uPD98402.c                                  | 2 +-
 drivers/atm/zatm.c                                      | 2 +-
 drivers/base/memory.c                                   | 2 +-
 drivers/base/power/sysfs.c                              | 2 +-
 drivers/block/cciss_scsi.c                              | 2 +-
 drivers/char/ipmi/ipmi_watchdog.c                       | 2 +-
 drivers/char/mspec.c                                    | 2 +-
 drivers/connector/cn_proc.c                             | 3 ++-
 drivers/edac/edac_stub.c                                | 2 +-
 drivers/firewire/core-card.c                            | 2 +-
 drivers/firewire/core-device.c                          | 2 +-
 drivers/firewire/core-topology.c                        | 2 +-
 drivers/firewire/core.h                                 | 2 +-
 drivers/firewire/nosy.c                                 | 2 +-
 drivers/gpu/drm/radeon/radeon.h                         | 2 +-
 drivers/gpu/drm/radeon/radeon_fence.c                   | 2 +-
 drivers/gpu/drm/ttm/ttm_bo.c                            | 2 +-
 drivers/gpu/drm/ttm/ttm_lock.c                          | 2 +-
 drivers/gpu/drm/ttm/ttm_object.c                        | 2 +-
 drivers/gpu/drm/ttm/ttm_page_alloc.c                    | 2 +-
 drivers/hwmon/sht15.c                                   | 2 +-
 drivers/infiniband/hw/cxgb4/mem.c                       | 2 +-
 drivers/infiniband/hw/ehca/ehca_tools.h                 | 2 +-
 drivers/infiniband/hw/nes/nes_cm.c                      | 2 +-
 drivers/infiniband/ulp/ipoib/ipoib.h                    | 2 +-
 drivers/infiniband/ulp/srp/ib_srp.c                     | 2 +-
 drivers/isdn/gigaset/gigaset.h                          | 2 +-
 drivers/md/dm-crypt.c                                   | 2 +-
 drivers/md/dm-kcopyd.c                                  | 2 +-
 drivers/md/dm-mpath.c                                   | 2 +-
 drivers/md/dm-queue-length.c                            | 2 +-
 drivers/md/dm-table.c                                   | 2 +-
 drivers/media/video/hdpvr/hdpvr-core.c                  | 2 +-
 drivers/media/video/tlg2300/pd-dvb.c                    | 2 +-
 drivers/media/video/uvc/uvc_ctrl.c                      | 2 +-
 drivers/media/video/uvc/uvc_queue.c                     | 2 +-
 drivers/media/video/uvc/uvc_v4l2.c                      | 2 +-
 drivers/media/video/uvc/uvc_video.c                     | 2 +-
 drivers/message/i2o/i2o_scsi.c                          | 2 +-
 drivers/misc/phantom.c                                  | 2 +-
 drivers/net/atlx/atl1.c                                 | 2 +-
 drivers/net/atlx/atl2.c                                 | 2 +-
 drivers/net/atlx/atl2.h                                 | 2 +-
 drivers/net/cassini.c                                   | 2 +-
 drivers/net/cpmac.c                                     | 2 +-
 drivers/net/cxgb3/cxgb3_offload.c                       | 2 +-
 drivers/net/cxgb3/l2t.h                                 | 2 +-
 drivers/net/cxgb3/t3cdev.h                              | 2 +-
 drivers/net/cxgb4/cxgb4_uld.h                           | 2 +-
 drivers/net/cxgb4/l2t.h                                 | 2 +-
 drivers/net/hamradio/6pack.c                            | 2 +-
 drivers/net/hamradio/dmascc.c                           | 2 +-
 drivers/net/ibmveth.c                                   | 2 +-
 drivers/net/phy/phy.c                                   | 2 +-
 drivers/net/ppp_generic.c                               | 2 +-
 drivers/net/wimax/i2400m/i2400m.h                       | 2 +-
 drivers/net/wireless/b43legacy/b43legacy.h              | 2 +-
 drivers/net/wireless/b43legacy/dma.h                    | 2 +-
 drivers/oprofile/oprofile_stats.h                       | 2 +-
 drivers/pci/hotplug/cpci_hotplug_core.c                 | 2 +-
 drivers/pci/xen-pcifront.c                              | 2 +-
 drivers/s390/block/dasd_eer.c                           | 2 +-
 drivers/s390/char/sclp_quiesce.c                        | 2 +-
 drivers/s390/char/vmlogrdr.c                            | 2 +-
 drivers/s390/cio/device.h                               | 2 +-
 drivers/s390/cio/qdio_main.c                            | 2 +-
 drivers/s390/cio/qdio_thinint.c                         | 2 +-
 drivers/s390/crypto/ap_bus.c                            | 2 +-
 drivers/s390/crypto/zcrypt_api.c                        | 2 +-
 drivers/s390/crypto/zcrypt_cex2a.c                      | 2 +-
 drivers/s390/crypto/zcrypt_mono.c                       | 2 +-
 drivers/s390/crypto/zcrypt_pcica.c                      | 2 +-
 drivers/s390/crypto/zcrypt_pcicc.c                      | 2 +-
 drivers/s390/crypto/zcrypt_pcixcc.c                     | 2 +-
 drivers/s390/net/fsm.h                                  | 2 +-
 drivers/s390/scsi/zfcp_scsi.c                           | 2 +-
 drivers/sbus/char/display7seg.c                         | 2 +-
 drivers/scsi/dpt/dpti_i2o.h                             | 2 +-
 drivers/scsi/hpsa.c                                     | 2 +-
 drivers/scsi/pm8001/pm8001_sas.h                        | 2 +-
 drivers/staging/octeon/ethernet-rx.c                    | 2 +-
 drivers/staging/octeon/ethernet-tx.c                    | 2 +-
 drivers/staging/solo6x10/solo6x10.h                     | 2 +-
 drivers/staging/tidspbridge/include/dspbridge/host_os.h | 2 +-
 drivers/staging/winbond/mds_s.h                         | 2 +-
 drivers/staging/winbond/wb35reg_s.h                     | 2 +-
 drivers/tty/bfin_jtag_comm.c                            | 2 +-
 drivers/tty/rocket.c                                    | 2 +-
 drivers/tty/serial/dz.c                                 | 2 +-
 drivers/tty/serial/sb1250-duart.c                       | 2 +-
 drivers/tty/serial/zs.c                                 | 2 +-
 drivers/usb/gadget/f_audio.c                            | 2 +-
 drivers/usb/gadget/f_rndis.c                            | 2 +-
 drivers/usb/gadget/uvc_queue.c                          | 2 +-
 drivers/usb/image/microtek.c                            | 2 +-
 drivers/usb/misc/appledisplay.c                         | 2 +-
 drivers/usb/serial/garmin_gps.c                         | 2 +-
 drivers/usb/wusbcore/wa-rpipe.c                         | 2 +-
 drivers/vhost/vhost.h                                   | 2 +-
 drivers/video/sh_mobile_lcdcfb.c                        | 2 +-
 drivers/video/vermilion/vermilion.h                     | 2 +-
 drivers/w1/masters/matrox_w1.c                          | 2 +-
 drivers/w1/w1.c                                         | 2 +-
 drivers/w1/w1_family.h                                  | 2 +-
 drivers/watchdog/intel_scu_watchdog.c                   | 2 +-
 drivers/watchdog/sbc7240_wdt.c                          | 2 +-
 fs/btrfs/delayed-inode.h                                | 2 +-
 fs/direct-io.c                                          | 2 +-
 fs/eventpoll.c                                          | 2 +-
 fs/file_table.c                                         | 2 +-
 fs/gfs2/main.c                                          | 2 +-
 fs/nfs/cache_lib.h                                      | 2 +-
 fs/nfs/direct.c                                         | 2 +-
 fs/notify/group.c                                       | 2 +-
 fs/notify/inode_mark.c                                  | 2 +-
 fs/notify/mark.c                                        | 2 +-
 fs/notify/notification.c                                | 2 +-
 fs/notify/vfsmount_mark.c                               | 2 +-
 fs/ntfs/inode.h                                         | 2 +-
 fs/posix_acl.c                                          | 2 +-
 fs/proc/meminfo.c                                       | 2 +-
 include/acpi/platform/aclinux.h                         | 2 +-
 include/asm-generic/atomic.h                            | 2 --
 include/asm-generic/local.h                             | 2 +-
 include/asm-generic/local64.h                           | 2 +-
 include/drm/ttm/ttm_lock.h                              | 2 +-
 include/linux/aio.h                                     | 2 +-
 include/linux/atmdev.h                                  | 2 +-
 include/linux/atomic.h                                  | 9 +++++++++
 include/linux/backing-dev.h                             | 2 +-
 include/linux/bit_spinlock.h                            | 2 +-
 include/linux/buffer_head.h                             | 2 +-
 include/linux/configfs.h                                | 2 +-
 include/linux/connector.h                               | 2 +-
 include/linux/cred.h                                    | 2 +-
 include/linux/crypto.h                                  | 2 +-
 include/linux/dcache.h                                  | 2 +-
 include/linux/debug_locks.h                             | 2 +-
 include/linux/device.h                                  | 2 +-
 include/linux/edac.h                                    | 2 +-
 include/linux/fault-inject.h                            | 2 +-
 include/linux/fdtable.h                                 | 2 +-
 include/linux/filter.h                                  | 2 +-
 include/linux/firewire.h                                | 2 +-
 include/linux/fsnotify_backend.h                        | 2 +-
 include/linux/interrupt.h                               | 2 +-
 include/linux/jump_label.h                              | 2 +-
 include/linux/kdb.h                                     | 2 +-
 include/linux/key.h                                     | 2 +-
 include/linux/kgdb.h                                    | 2 +-
 include/linux/kobject.h                                 | 2 +-
 include/linux/mlx4/device.h                             | 2 +-
 include/linux/mman.h                                    | 2 +-
 include/linux/mmzone.h                                  | 2 +-
 include/linux/mount.h                                   | 2 +-
 include/linux/mutex.h                                   | 2 +-
 include/linux/netdevice.h                               | 2 +-
 include/linux/nfs_fs_sb.h                               | 2 +-
 include/linux/oprofile.h                                | 2 +-
 include/linux/pci.h                                     | 2 +-
 include/linux/perf_event.h                              | 2 +-
 include/linux/phy.h                                     | 2 +-
 include/linux/proc_fs.h                                 | 2 +-
 include/linux/quota.h                                   | 2 +-
 include/linux/rwsem.h                                   | 2 +-
 include/linux/sem.h                                     | 2 +-
 include/linux/skbuff.h                                  | 2 +-
 include/linux/sonet.h                                   | 2 +-
 include/linux/spinlock.h                                | 2 +-
 include/linux/sunrpc/auth.h                             | 2 +-
 include/linux/sunrpc/cache.h                            | 2 +-
 include/linux/sunrpc/timer.h                            | 2 +-
 include/linux/swap.h                                    | 2 +-
 include/linux/sysfs.h                                   | 2 +-
 include/linux/vmstat.h                                  | 2 +-
 include/linux/workqueue.h                               | 2 +-
 include/net/ax25.h                                      | 2 +-
 include/net/cipso_ipv4.h                                | 2 +-
 include/net/flow.h                                      | 2 +-
 include/net/inet_hashtables.h                           | 2 +-
 include/net/inet_timewait_sock.h                        | 2 +-
 include/net/inetpeer.h                                  | 2 +-
 include/net/ip_vs.h                                     | 2 +-
 include/net/lib80211.h                                  | 2 +-
 include/net/llc.h                                       | 2 +-
 include/net/neighbour.h                                 | 2 +-
 include/net/net_namespace.h                             | 2 +-
 include/net/netfilter/nf_conntrack.h                    | 2 +-
 include/net/netlabel.h                                  | 2 +-
 include/net/netns/conntrack.h                           | 2 +-
 include/net/sctp/structs.h                              | 2 +-
 include/pcmcia/ds.h                                     | 2 +-
 include/rdma/ib_sa.h                                    | 2 +-
 include/rdma/ib_verbs.h                                 | 2 +-
 include/rxrpc/types.h                                   | 2 +-
 include/scsi/scsi_device.h                              | 2 +-
 kernel/audit.c                                          | 2 +-
 kernel/auditsc.c                                        | 2 +-
 kernel/cgroup.c                                         | 2 +-
 kernel/cpuset.c                                         | 2 +-
 kernel/debug/debug_core.c                               | 2 +-
 kernel/rcupdate.c                                       | 2 +-
 kernel/rcutorture.c                                     | 2 +-
 kernel/rcutree_trace.c                                  | 2 +-
 kernel/rwsem.c                                          | 2 +-
 kernel/stop_machine.c                                   | 2 +-
 kernel/taskstats.c                                      | 2 +-
 kernel/trace/trace.h                                    | 2 +-
 kernel/trace/trace_mmiotrace.c                          | 2 +-
 lib/atomic64.c                                          | 2 +-
 lib/atomic64_test.c                                     | 2 +-
 lib/crc32.c                                             | 2 +-
 lib/dec_and_lock.c                                      | 2 +-
 mm/init-mm.c                                            | 2 +-
 mm/kmemleak.c                                           | 2 +-
 mm/slob.c                                               | 2 +-
 mm/vmalloc.c                                            | 2 +-
 net/atm/atm_misc.c                                      | 2 +-
 net/atm/clip.c                                          | 2 +-
 net/atm/common.c                                        | 2 +-
 net/atm/lec.c                                           | 2 +-
 net/atm/proc.c                                          | 2 +-
 net/bridge/br_fdb.c                                     | 2 +-
 net/core/flow.c                                         | 2 +-
 net/decnet/dn_fib.c                                     | 2 +-
 net/decnet/dn_neigh.c                                   | 2 +-
 net/decnet/dn_table.c                                   | 2 +-
 net/decnet/dn_timer.c                                   | 2 +-
 net/ipv4/cipso_ipv4.c                                   | 2 +-
 net/ipv4/raw.c                                          | 2 +-
 net/ipv6/ip6_tunnel.c                                   | 2 +-
 net/iucv/iucv.c                                         | 2 +-
 net/l2tp/l2tp_core.c                                    | 2 +-
 net/l2tp/l2tp_ppp.c                                     | 2 +-
 net/netfilter/nfnetlink_log.c                           | 2 +-
 net/netfilter/nfnetlink_queue.c                         | 2 +-
 net/netlabel/netlabel_cipso_v4.c                        | 2 +-
 net/netlabel/netlabel_kapi.c                            | 2 +-
 net/netlabel/netlabel_mgmt.c                            | 2 +-
 net/netlabel/netlabel_mgmt.h                            | 2 +-
 net/netlabel/netlabel_unlabeled.c                       | 2 +-
 net/sunrpc/xprtrdma/xprt_rdma.h                         | 2 +-
 net/tipc/core.h                                         | 2 +-
 security/selinux/hooks.c                                | 2 +-
 security/selinux/xfrm.c                                 | 2 +-
 sound/pci/echoaudio/darla20.c                           | 2 +-
 sound/pci/echoaudio/darla24.c                           | 2 +-
 sound/pci/echoaudio/echo3g.c                            | 2 +-
 sound/pci/echoaudio/gina20.c                            | 2 +-
 sound/pci/echoaudio/gina24.c                            | 2 +-
 sound/pci/echoaudio/indigo.c                            | 2 +-
 sound/pci/echoaudio/indigodj.c                          | 2 +-
 sound/pci/echoaudio/indigodjx.c                         | 2 +-
 sound/pci/echoaudio/indigoio.c                          | 2 +-
 sound/pci/echoaudio/indigoiox.c                         | 2 +-
 sound/pci/echoaudio/layla20.c                           | 2 +-
 sound/pci/echoaudio/layla24.c                           | 2 +-
 sound/pci/echoaudio/mia.c                               | 2 +-
 sound/pci/echoaudio/mona.c                              | 2 +-
 sound/pci/lx6464es/lx6464es.h                           | 2 +-
 sound/sparc/dbri.c                                      | 2 +-
 439 files changed, 427 insertions(+), 448 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index e756d04b6cd5..88b7491490bc 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -199,7 +199,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /**
  * atomic64_add_unless - add unless the number is a given value
diff --git a/arch/alpha/include/asm/local.h b/arch/alpha/include/asm/local.h
index b9e3e3318371..9c94b8456043 100644
--- a/arch/alpha/include/asm/local.h
+++ b/arch/alpha/include/asm/local.h
@@ -2,7 +2,7 @@
 #define _ALPHA_LOCAL_H
 
 #include <linux/percpu.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 typedef struct
 {
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 8e47709160f8..8143cd7cdbfb 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -17,7 +17,7 @@
 #include <linux/init.h>
 
 #include <asm/hwrpb.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
 #include <asm/pal.h>
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index d739703608fc..4087a569b43b 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -31,7 +31,7 @@
 
 #include <asm/hwrpb.h>
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/arch/alpha/lib/dec_and_lock.c b/arch/alpha/lib/dec_and_lock.c
index 0f5520d2f45f..f9f5fe830e9f 100644
--- a/arch/alpha/lib/dec_and_lock.c
+++ b/arch/alpha/lib/dec_and_lock.c
@@ -6,7 +6,7 @@
  */
 
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
   asm (".text					\n\
 	.global _atomic_dec_and_lock		\n\
diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h
index 7e79503ab89b..4d501f1bdc9d 100644
--- a/arch/arm/include/asm/atomic.h
+++ b/arch/arm/include/asm/atomic.h
@@ -217,7 +217,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 		c = old;
 	return c != u;
 }
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #define atomic_inc(v)		atomic_add(1, v)
 #define atomic_dec(v)		atomic_sub(1, v)
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 167e3cbe1f2f..d88ff0230e82 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -27,7 +27,7 @@
 #include <linux/clockchips.h>
 #include <linux/completion.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/cputype.h>
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 2d3436e9f71f..bc9f9da782cb 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -25,7 +25,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/system.h>
 #include <asm/unistd.h>
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index ea53f4d9b283..4159eca78945 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -20,7 +20,7 @@
 #include <linux/io.h>
 
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
 
diff --git a/arch/arm/mach-bcmring/dma.c b/arch/arm/mach-bcmring/dma.c
index 9f2a948e0e72..0ca00050666a 100644
--- a/arch/arm/mach-bcmring/dma.c
+++ b/arch/arm/mach-bcmring/dma.c
@@ -34,7 +34,7 @@
 
 #include <linux/mm.h>
 #include <linux/pfn.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <mach/dma.h>
 
 /* I don't quite understand why dc4 fails when this is set to 1 and DMA is enabled */
diff --git a/arch/arm/mach-cns3xxx/include/mach/pm.h b/arch/arm/mach-cns3xxx/include/mach/pm.h
index 6eae7f764d1d..c2588cc991d1 100644
--- a/arch/arm/mach-cns3xxx/include/mach/pm.h
+++ b/arch/arm/mach-cns3xxx/include/mach/pm.h
@@ -11,7 +11,7 @@
 #ifndef __CNS3XXX_PM_H
 #define __CNS3XXX_PM_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 void cns3xxx_pwr_clk_en(unsigned int block);
 void cns3xxx_pwr_clk_dis(unsigned int block);
diff --git a/arch/arm/mach-cns3xxx/pm.c b/arch/arm/mach-cns3xxx/pm.c
index 5e579552aa54..0c04678615ce 100644
--- a/arch/arm/mach-cns3xxx/pm.c
+++ b/arch/arm/mach-cns3xxx/pm.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/delay.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <mach/system.h>
 #include <mach/cns3xxx.h>
 #include <mach/pm.h>
diff --git a/arch/arm/mach-omap1/pm.c b/arch/arm/mach-omap1/pm.c
index 98ba9784aa15..495b3987d461 100644
--- a/arch/arm/mach-omap1/pm.c
+++ b/arch/arm/mach-omap1/pm.c
@@ -44,7 +44,7 @@
 #include <linux/io.h>
 
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/mach/time.h>
 #include <asm/mach/irq.h>
 
diff --git a/arch/arm/mach-s3c2440/clock.c b/arch/arm/mach-s3c2440/clock.c
index 554e0d3ec70b..f9e6bdaf41d2 100644
--- a/arch/arm/mach-s3c2440/clock.c
+++ b/arch/arm/mach-s3c2440/clock.c
@@ -36,7 +36,7 @@
 #include <linux/io.h>
 
 #include <mach/hardware.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 
 #include <mach/regs-clock.h>
diff --git a/arch/arm/mach-s3c2440/s3c2442.c b/arch/arm/mach-s3c2440/s3c2442.c
index 6224bad4d604..9ad99f8016a1 100644
--- a/arch/arm/mach-s3c2440/s3c2442.c
+++ b/arch/arm/mach-s3c2440/s3c2442.c
@@ -38,7 +38,7 @@
 #include <linux/io.h>
 
 #include <mach/hardware.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 
 #include <mach/regs-clock.h>
diff --git a/arch/arm/mach-s3c2440/s3c244x-clock.c b/arch/arm/mach-s3c2440/s3c244x-clock.c
index f8d96130d1d1..7f5ea0a169a5 100644
--- a/arch/arm/mach-s3c2440/s3c244x-clock.c
+++ b/arch/arm/mach-s3c2440/s3c244x-clock.c
@@ -35,7 +35,7 @@
 #include <linux/io.h>
 
 #include <mach/hardware.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 
 #include <mach/regs-clock.h>
diff --git a/arch/avr32/include/asm/atomic.h b/arch/avr32/include/asm/atomic.h
index bbce6a1c6bb6..f229c3849f03 100644
--- a/arch/avr32/include/asm/atomic.h
+++ b/arch/avr32/include/asm/atomic.h
@@ -188,7 +188,6 @@ static inline int atomic_sub_if_positive(int i, atomic_t *v)
 #define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0)
 #define atomic_add_negative(i, v) (atomic_add_return(i, v) < 0)
 
-#define atomic_inc_not_zero(v)	atomic_add_unless(v, 1, 0)
 #define atomic_dec_if_positive(v) atomic_sub_if_positive(1, v)
 
 #define smp_mb__before_atomic_dec()	barrier()
diff --git a/arch/blackfin/include/asm/atomic.h b/arch/blackfin/include/asm/atomic.h
index 4c707dbe1ff9..f2cf5b714ea4 100644
--- a/arch/blackfin/include/asm/atomic.h
+++ b/arch/blackfin/include/asm/atomic.h
@@ -97,7 +97,6 @@ static inline void atomic_set_mask(int mask, atomic_t *v)
 		c = old;					\
 	c != (u);						\
 })
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /*
  * atomic_inc_and_test - increment and test
diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index d9dbc1a53534..dac0c97242bb 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h
@@ -10,7 +10,7 @@
 
 #include <linux/interrupt.h>
 #include <mach/dma.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/blackfin.h>
 #include <asm/page.h>
 #include <asm-generic/dma.h>
diff --git a/arch/blackfin/include/asm/ipipe.h b/arch/blackfin/include/asm/ipipe.h
index 9e0cc0e2534f..17b5e92e3bc6 100644
--- a/arch/blackfin/include/asm/ipipe.h
+++ b/arch/blackfin/include/asm/ipipe.h
@@ -32,7 +32,7 @@
 #include <asm/ptrace.h>
 #include <asm/irq.h>
 #include <asm/bitops.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/traps.h>
 #include <asm/bitsperlong.h>
 
diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h
index 2336093fca23..490c7caa02d9 100644
--- a/arch/blackfin/include/asm/spinlock.h
+++ b/arch/blackfin/include/asm/spinlock.h
@@ -11,7 +11,7 @@
 # include <asm-generic/spinlock.h>
 #else
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 asmlinkage int __raw_spin_is_locked_asm(volatile int *ptr);
 asmlinkage void __raw_spin_lock_asm(volatile int *ptr);
diff --git a/arch/blackfin/kernel/ftrace.c b/arch/blackfin/kernel/ftrace.c
index 48808a12b427..9277905b82cf 100644
--- a/arch/blackfin/kernel/ftrace.c
+++ b/arch/blackfin/kernel/ftrace.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/arch/blackfin/kernel/ipipe.c b/arch/blackfin/kernel/ipipe.c
index 486426f8a0d7..dbe11220cc53 100644
--- a/arch/blackfin/kernel/ipipe.c
+++ b/arch/blackfin/kernel/ipipe.c
@@ -32,7 +32,7 @@
 #include <linux/unistd.h>
 #include <linux/io.h>
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq_handler.h>
 
 DEFINE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
index 679d0db35256..9919d29287dc 100644
--- a/arch/blackfin/kernel/nmi.c
+++ b/arch/blackfin/kernel/nmi.c
@@ -18,7 +18,7 @@
 #include <linux/smp.h>
 #include <linux/timer.h>
 #include <asm/blackfin.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/bfin_watchdog.h>
 
diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c
index 1c143a4de5f5..107622aacf6b 100644
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -23,7 +23,7 @@
 #include <linux/seq_file.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/irq_handler.h>
 #include <asm/mmu_context.h>
diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c
index c03bc3bc30c2..642c6fed43d7 100644
--- a/arch/cris/arch-v32/drivers/cryptocop.c
+++ b/arch/cris/arch-v32/drivers/cryptocop.c
@@ -16,7 +16,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/list.h>
 #include <linux/interrupt.h>
diff --git a/arch/cris/arch-v32/kernel/smp.c b/arch/cris/arch-v32/kernel/smp.c
index a0843a71aaee..0b99df72d2a4 100644
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -7,7 +7,7 @@
 #include <asm/mmu_context.h>
 #include <hwregs/asm/mmu_defs_asm.h>
 #include <hwregs/supp_reg.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/err.h>
 #include <linux/init.h>
diff --git a/arch/cris/include/asm/atomic.h b/arch/cris/include/asm/atomic.h
index 88dc9b9c4ba0..ce9f67e4d977 100644
--- a/arch/cris/include/asm/atomic.h
+++ b/arch/cris/include/asm/atomic.h
@@ -150,7 +150,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 	cris_atomic_restore(v, flags);
 	return ret != u;
 }
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()    barrier()
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index c0092fc7d846..a78a2d70cd8b 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -20,7 +20,7 @@
 
 #include <arch/bitops.h>
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/compiler.h>
 
 /*
diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c
index c99aeab7cef7..aa585e4e979e 100644
--- a/arch/cris/kernel/process.c
+++ b/arch/cris/kernel/process.c
@@ -12,7 +12,7 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
 #include <asm/irq.h>
diff --git a/arch/frv/include/asm/atomic.h b/arch/frv/include/asm/atomic.h
index fae32c7fdcb6..b07b75f411f2 100644
--- a/arch/frv/include/asm/atomic.h
+++ b/arch/frv/include/asm/atomic.h
@@ -256,7 +256,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #include <asm-generic/atomic-long.h>
 #endif /* _ASM_ATOMIC_H */
diff --git a/arch/frv/include/asm/hardirq.h b/arch/frv/include/asm/hardirq.h
index 5fc8b6f5bc55..c62833d6ebbb 100644
--- a/arch/frv/include/asm/hardirq.h
+++ b/arch/frv/include/asm/hardirq.h
@@ -12,7 +12,7 @@
 #ifndef __ASM_HARDIRQ_H
 #define __ASM_HARDIRQ_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 extern atomic_t irq_err_count;
 static inline void ack_bad_irq(int irq)
diff --git a/arch/frv/kernel/irq.c b/arch/frv/kernel/irq.c
index a5f624a9f559..3facbc28cbbc 100644
--- a/arch/frv/kernel/irq.c
+++ b/arch/frv/kernel/irq.c
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/smp.h>
 #include <asm/system.h>
diff --git a/arch/h8300/include/asm/atomic.h b/arch/h8300/include/asm/atomic.h
index 984221abb66d..b641714774ea 100644
--- a/arch/h8300/include/asm/atomic.h
+++ b/arch/h8300/include/asm/atomic.h
@@ -116,7 +116,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 	local_irq_restore(flags);
 	return ret != u;
 }
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 static __inline__ void atomic_clear_mask(unsigned long mask, unsigned long *v)
 {
diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h
index 446881439675..fdb887005dff 100644
--- a/arch/ia64/include/asm/atomic.h
+++ b/arch/ia64/include/asm/atomic.h
@@ -105,7 +105,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 static __inline__ long atomic64_add_unless(atomic64_t *v, long a, long u)
 {
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 03afe7970748..d9f397fae03e 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -75,7 +75,7 @@
 #include <asm/percpu.h>
 #include <asm/rse.h>
 #include <asm/unwind.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #ifdef CONFIG_NUMA
 #include <asm/nodedata.h>
 #endif
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index 1a91c9121d17..b77768d35f93 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/bitops.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/intrinsics.h>
 #include <asm/system.h>
 
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
index be450a3e9871..0bd537b4ea6b 100644
--- a/arch/ia64/kernel/smp.c
+++ b/arch/ia64/kernel/smp.c
@@ -32,7 +32,7 @@
 #include <linux/bitops.h>
 #include <linux/kexec.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/current.h>
 #include <asm/delay.h>
 #include <asm/machvec.h>
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 14ec641003da..559097986672 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -40,7 +40,7 @@
 #include <linux/percpu.h>
 #include <linux/bitops.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cache.h>
 #include <asm/current.h>
 #include <asm/delay.h>
diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c
index c4696d217ce0..6a867dc45c05 100644
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@@ -25,7 +25,7 @@
 #include <asm/pal.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/tlbflush.h>
 #include <asm/sn/arch.h>
 
diff --git a/arch/m32r/include/asm/atomic.h b/arch/m32r/include/asm/atomic.h
index d44a51e5271b..d64d894dc549 100644
--- a/arch/m32r/include/asm/atomic.h
+++ b/arch/m32r/include/asm/atomic.h
@@ -262,7 +262,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 static __inline__ void atomic_clear_mask(unsigned long  mask, atomic_t *addr)
 {
diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h
index a70a3df33635..a979a4198168 100644
--- a/arch/m32r/include/asm/mmu_context.h
+++ b/arch/m32r/include/asm/mmu_context.h
@@ -11,7 +11,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pgalloc.h>
 #include <asm/mmu.h>
 #include <asm/tlbflush.h>
diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h
index 179a06489b10..b0ea2f26da3b 100644
--- a/arch/m32r/include/asm/spinlock.h
+++ b/arch/m32r/include/asm/spinlock.h
@@ -10,7 +10,7 @@
  */
 
 #include <linux/compiler.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 
 /*
diff --git a/arch/m32r/kernel/smp.c b/arch/m32r/kernel/smp.c
index 092d40a6708e..ce7aea34fdf4 100644
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -26,7 +26,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/mmu_context.h>
 #include <asm/m32r.h>
diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c
index fbd109031df3..ee6a9199561c 100644
--- a/arch/m32r/kernel/traps.c
+++ b/arch/m32r/kernel/traps.c
@@ -21,7 +21,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <asm/smp.h>
 
diff --git a/arch/m68k/include/asm/atomic.h b/arch/m68k/include/asm/atomic.h
index 307a573881ad..e844a2d2ba23 100644
--- a/arch/m68k/include/asm/atomic.h
+++ b/arch/m68k/include/asm/atomic.h
@@ -198,7 +198,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /* Atomic operations are already serializing */
 #define smp_mb__before_atomic_dec()	barrier()
diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h
index 3e5c254e8d1c..d68647746448 100644
--- a/arch/microblaze/include/asm/mmu_context_mm.h
+++ b/arch/microblaze/include/asm/mmu_context_mm.h
@@ -11,7 +11,7 @@
 #ifndef _ASM_MICROBLAZE_MMU_CONTEXT_H
 #define _ASM_MICROBLAZE_MMU_CONTEXT_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/bitops.h>
 #include <asm/mmu.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h
index 9bd01ecb00d6..9ad567e2d425 100644
--- a/arch/microblaze/include/asm/prom.h
+++ b/arch/microblaze/include/asm/prom.h
@@ -21,7 +21,7 @@
 
 #include <linux/types.h>
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h
index 4a02fe891ab6..833a4023648a 100644
--- a/arch/mips/include/asm/atomic.h
+++ b/arch/mips/include/asm/atomic.h
@@ -325,7 +325,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	}
 	return c != (u);
 }
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #define atomic_dec_return(v) atomic_sub_return(1, (v))
 #define atomic_inc_return(v) atomic_add_return(1, (v))
diff --git a/arch/mips/include/asm/hw_irq.h b/arch/mips/include/asm/hw_irq.h
index 77adda297ad9..9e8ef5994c9c 100644
--- a/arch/mips/include/asm/hw_irq.h
+++ b/arch/mips/include/asm/hw_irq.h
@@ -8,7 +8,7 @@
 #ifndef __ASM_HW_IRQ_H
 #define __ASM_HW_IRQ_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 extern atomic_t irq_err_count;
 
diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h
index fffc8307a80a..94fde8d0fac1 100644
--- a/arch/mips/include/asm/local.h
+++ b/arch/mips/include/asm/local.h
@@ -3,7 +3,7 @@
 
 #include <linux/percpu.h>
 #include <linux/bitops.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cmpxchg.h>
 #include <asm/war.h>
 
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index af42385245d5..d4fb4d852a6d 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -17,7 +17,7 @@
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp-ops.h>
 
 extern int smp_num_siblings;
diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index 9b734d74ae8e..b53970d80991 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -23,7 +23,7 @@
 #include <linux/kgdb.h>
 #include <linux/ftrace.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
diff --git a/arch/mips/kernel/mips-mt.c b/arch/mips/kernel/mips-mt.c
index b2259e7cd829..594ca69cb867 100644
--- a/arch/mips/kernel/mips-mt.c
+++ b/arch/mips/kernel/mips-mt.c
@@ -12,7 +12,7 @@
 
 #include <asm/cpu.h>
 #include <asm/processor.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/hardirq.h>
 #include <asm/mmu_context.h>
diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c
index 557ef72472e0..7a80b7cda7cc 100644
--- a/arch/mips/kernel/rtlx.c
+++ b/arch/mips/kernel/rtlx.c
@@ -36,7 +36,7 @@
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
 #include <asm/cacheflush.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/system.h>
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index cc81771b882c..fe3095160655 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -25,7 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 1ec56e635d04..ce9e286f0a74 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -24,7 +24,7 @@
 #include <linux/compiler.h>
 #include <linux/smp.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 32a256101082..32c1e954cd37 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -34,7 +34,7 @@
 #include <linux/err.h>
 #include <linux/ftrace.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/r4k-timer.h>
diff --git a/arch/mips/kernel/smtc-proc.c b/arch/mips/kernel/smtc-proc.c
index fe256559c997..928a5a61e1a6 100644
--- a/arch/mips/kernel/smtc-proc.c
+++ b/arch/mips/kernel/smtc-proc.c
@@ -10,7 +10,7 @@
 
 #include <asm/cpu.h>
 #include <asm/processor.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/hardirq.h>
 #include <asm/mmu_context.h>
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index cedac4633741..f0895e70e283 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -30,7 +30,7 @@
 
 #include <asm/cpu.h>
 #include <asm/processor.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/hardirq.h>
 #include <asm/hazards.h>
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
index 05dd170a83f7..99f913c8d7a6 100644
--- a/arch/mips/kernel/sync-r4k.c
+++ b/arch/mips/kernel/sync-r4k.c
@@ -16,7 +16,7 @@
 #include <linux/cpumask.h>
 
 #include <asm/r4k-timer.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/barrier.h>
 #include <asm/mipsregs.h>
 
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index dbb6b408f001..2cd50ad0d5c6 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -46,7 +46,7 @@
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
 #include <asm/cacheflush.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cpu.h>
 #include <asm/mips_mt.h>
 #include <asm/processor.h>
diff --git a/arch/mips/mipssim/sim_smtc.c b/arch/mips/mipssim/sim_smtc.c
index 30df47258c2c..915063991f6e 100644
--- a/arch/mips/mipssim/sim_smtc.c
+++ b/arch/mips/mipssim/sim_smtc.c
@@ -24,7 +24,7 @@
 #include <linux/interrupt.h>
 #include <linux/smp.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
 #include <asm/smtc.h>
diff --git a/arch/mips/sgi-ip27/ip27-nmi.c b/arch/mips/sgi-ip27/ip27-nmi.c
index bc4fa8dd67f3..005c29ed419a 100644
--- a/arch/mips/sgi-ip27/ip27-nmi.c
+++ b/arch/mips/sgi-ip27/ip27-nmi.c
@@ -3,7 +3,7 @@
 #include <linux/nodemask.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/sn/types.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/nmi.h>
diff --git a/arch/mn10300/include/asm/atomic.h b/arch/mn10300/include/asm/atomic.h
index 9d773a639513..041b9d69d86c 100644
--- a/arch/mn10300/include/asm/atomic.h
+++ b/arch/mn10300/include/asm/atomic.h
@@ -269,7 +269,6 @@ static inline void atomic_dec(atomic_t *v)
 	c != (u);						\
 })
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /**
  * atomic_clear_mask - Atomically clear bits in memory
diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h
index c8f6c82672ad..c67c2b5365a6 100644
--- a/arch/mn10300/include/asm/mmu_context.h
+++ b/arch/mn10300/include/asm/mmu_context.h
@@ -22,7 +22,7 @@
 #ifndef _ASM_MMU_CONTEXT_H
 #define _ASM_MMU_CONTEXT_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/mn10300/include/asm/spinlock.h b/arch/mn10300/include/asm/spinlock.h
index 93429154e898..1ae580f38933 100644
--- a/arch/mn10300/include/asm/spinlock.h
+++ b/arch/mn10300/include/asm/spinlock.h
@@ -11,7 +11,7 @@
 #ifndef _ASM_SPINLOCK_H
 #define _ASM_SPINLOCK_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/rwlock.h>
 #include <asm/page.h>
 
diff --git a/arch/mn10300/include/asm/system.h b/arch/mn10300/include/asm/system.h
index 8ff3e5aaca41..94b4c5e1491b 100644
--- a/arch/mn10300/include/asm/system.h
+++ b/arch/mn10300/include/asm/system.h
@@ -19,7 +19,7 @@
 
 #include <linux/kernel.h>
 #include <linux/irqflags.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #if !defined(CONFIG_LAZY_SAVE_FPU)
 struct fpu_state_struct;
diff --git a/arch/mn10300/kernel/mn10300-watchdog.c b/arch/mn10300/kernel/mn10300-watchdog.c
index c5e12bfd9fcd..a45f0c7549a6 100644
--- a/arch/mn10300/kernel/mn10300-watchdog.c
+++ b/arch/mn10300/kernel/mn10300-watchdog.c
@@ -19,7 +19,7 @@
 #include <linux/nmi.h>
 #include <asm/processor.h>
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/intctl-regs.h>
 #include <asm/rtc-regs.h>
 #include <asm/div64.h>
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
index bd3e5e73826e..9220a75a7b43 100644
--- a/arch/mn10300/kernel/traps.c
+++ b/arch/mn10300/kernel/traps.c
@@ -30,7 +30,7 @@
 #include <asm/system.h>
 #include <linux/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp.h>
 #include <asm/pgalloc.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c
index eef989c1d0c1..f9bb8cb1c14a 100644
--- a/arch/mn10300/mm/misalignment.c
+++ b/arch/mn10300/mm/misalignment.c
@@ -26,7 +26,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp.h>
 #include <asm/pgalloc.h>
 #include <asm/cpu-regs.h>
diff --git a/arch/mn10300/proc-mn2ws0050/proc-init.c b/arch/mn10300/proc-mn2ws0050/proc-init.c
index c58249b9525a..fe6e24906ffc 100644
--- a/arch/mn10300/proc-mn2ws0050/proc-init.c
+++ b/arch/mn10300/proc-mn2ws0050/proc-init.c
@@ -18,7 +18,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp.h>
 #include <asm/pgalloc.h>
 #include <asm/busctl-regs.h>
diff --git a/arch/parisc/include/asm/atomic.h b/arch/parisc/include/asm/atomic.h
index f81955934aeb..192488999b63 100644
--- a/arch/parisc/include/asm/atomic.h
+++ b/arch/parisc/include/asm/atomic.h
@@ -220,7 +220,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #define atomic_add(i,v)	((void)(__atomic_add_return( (i),(v))))
 #define atomic_sub(i,v)	((void)(__atomic_add_return(-(i),(v))))
diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h
index 4e833aa05a44..8c9b631d2a78 100644
--- a/arch/parisc/include/asm/bitops.h
+++ b/arch/parisc/include/asm/bitops.h
@@ -8,7 +8,7 @@
 #include <linux/compiler.h>
 #include <asm/types.h>		/* for BITS_PER_LONG/SHIFT_PER_LONG */
 #include <asm/byteorder.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * HP-PARISC specific bit operations
diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h
index 354b2aca990e..59be25764433 100644
--- a/arch/parisc/include/asm/mmu_context.h
+++ b/arch/parisc/include/asm/mmu_context.h
@@ -3,7 +3,7 @@
 
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
 #include <asm-generic/mm_hooks.h>
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index df653663d3db..a7bb757a5497 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -31,7 +31,7 @@
 #include <linux/string.h>
 EXPORT_SYMBOL(memset);
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 EXPORT_SYMBOL(__xchg8);
 EXPORT_SYMBOL(__xchg32);
 EXPORT_SYMBOL(__cmpxchg_u32);
diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c
index 828305f19cff..32d588488f04 100644
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -33,7 +33,7 @@
 #include <linux/ftrace.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/current.h>
 #include <asm/delay.h>
 #include <asm/tlbflush.h>
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 8b58bf0b7d5a..f19e6604026a 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -33,7 +33,7 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 #include <asm/unaligned.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp.h>
 #include <asm/pdc.h>
 #include <asm/pdc_chassis.h>
diff --git a/arch/parisc/lib/bitops.c b/arch/parisc/lib/bitops.c
index 353963d42059..a8bffd8af77d 100644
--- a/arch/parisc/lib/bitops.c
+++ b/arch/parisc/lib/bitops.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/spinlock.h>
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_SMP
 arch_spinlock_t __atomic_hash[ATOMIC_HASH_SIZE] __lock_aligned = {
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index b8f152ece025..b2bcbee622ea 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -212,7 +212,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return t != u;
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #define atomic_sub_and_test(a, v)	(atomic_sub_return((a), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_dec_return((v)) == 0)
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index 2cc41c715d2b..63f2a22e9954 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -18,7 +18,7 @@
 #ifndef _ASM_POWERPC_EMULATED_OPS_H
 #define _ASM_POWERPC_EMULATED_OPS_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/perf_event.h>
 
 
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index c57a28e52b64..c0e1bc319e35 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -14,7 +14,7 @@
 #include <linux/radix-tree.h>
 
 #include <asm/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 
 /* Define a way to iterate across irqs. */
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index c2410af6bfd9..b8da91363864 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -2,7 +2,7 @@
 #define _ARCH_POWERPC_LOCAL_H
 
 #include <linux/percpu.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 typedef struct
 {
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index b823536375dc..b5c91901e384 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -18,7 +18,7 @@
  */
 #include <linux/types.h>
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define HAVE_ARCH_DEVTREE_FIXUPS
 
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 24582181b6ec..59dbf6abaaf3 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -26,7 +26,7 @@
 #include <asm/topology.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_PPC_OF_PLATFORM_PCI
 
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 7d28f540200c..f5ae872a2ef0 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -18,7 +18,7 @@
 #include <asm/cacheflush.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/checksum.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 0e0ea941156f..d5ca8236315c 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -39,7 +39,7 @@
 #include <asm/udbg.h>
 #include <asm/syscalls.h>
 #include <asm/smp.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/mmu.h>
 #include <asm/topology.h>
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 67f6c3b51357..481ef064c8f1 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -27,7 +27,7 @@
 #include <asm/rtas.h>
 #include <asm/prom.h>
 #include <asm/nvram.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/machdep.h>
 
 
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
index 03e45c4a9ef1..640de836e466 100644
--- a/arch/powerpc/kernel/smp-tbsync.c
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -11,7 +11,7 @@
 #include <linux/unistd.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp.h>
 #include <asm/time.h>
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index f932f8a0cf0c..7bf2187dfd99 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -33,7 +33,7 @@
 #include <linux/topology.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index f8fa2fc3129f..c55129f5760a 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -28,7 +28,7 @@
 #include <linux/of_device.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index 93e60f1f21a9..32a52896822f 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -27,7 +27,7 @@
 #include <linux/of_device.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
index 81e44fa1c644..6b45969567d4 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_itx.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -26,7 +26,7 @@
 #include <linux/of_platform.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c
index c1b1dc50b32a..041c5177e737 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_mds.c
@@ -26,7 +26,7 @@
 #include <linux/of_platform.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 81c052b1353e..934cc8c46bbc 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -34,7 +34,7 @@
 #include <linux/of_device.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c
index 49023dbe1576..af41d8c810a8 100644
--- a/arch/powerpc/platforms/83xx/sbc834x.c
+++ b/arch/powerpc/platforms/83xx/sbc834x.c
@@ -28,7 +28,7 @@
 #include <linux/of_platform.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
index 6299a2a51ae8..2bf99786d249 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
@@ -31,7 +31,7 @@
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index 747d1ee661fd..973b3f4a4b49 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -36,7 +36,7 @@
 #include <linux/memblock.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
index ecdd8c09e4ed..d07dcb7f4ee9 100644
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ b/arch/powerpc/platforms/85xx/sbc8548.c
@@ -34,7 +34,7 @@
 #include <asm/system.h>
 #include <asm/pgtable.h>
 #include <asm/page.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/time.h>
 #include <asm/io.h>
 #include <asm/machdep.h>
diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index d809836bcf5f..7f92096fe968 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -24,7 +24,7 @@
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/machdep.h>
 #include <asm/spu.h>
 
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index dbb641ea90dd..f2e1dfe4bf31 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -28,7 +28,7 @@
 #include <linux/cpu.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 0c87bcd2452a..bf4d41d8fa14 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -24,7 +24,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/spu.h>
 #include <asm/spu_csa.h>
 #include "spufs.h"
diff --git a/arch/powerpc/platforms/chrp/smp.c b/arch/powerpc/platforms/chrp/smp.c
index a800122e4dda..feab30bbae23 100644
--- a/arch/powerpc/platforms/chrp/smp.c
+++ b/arch/powerpc/platforms/chrp/smp.c
@@ -18,7 +18,7 @@
 #include <linux/spinlock.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 2df48c2287bd..8bda9be06fa0 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -29,7 +29,7 @@
 #include <linux/cpu.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c
index d679964ae2ab..c2f3e861f5ea 100644
--- a/arch/powerpc/platforms/powermac/backlight.c
+++ b/arch/powerpc/platforms/powermac/backlight.c
@@ -12,7 +12,7 @@
 #include <linux/backlight.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/prom.h>
 #include <asm/backlight.h>
 
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index d15fca322978..9a521dc8e485 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -35,7 +35,7 @@
 #include <linux/compiler.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/code-patching.h>
 #include <asm/irq.h>
 #include <asm/page.h>
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 46b55cf563e3..ada6e07532ec 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -31,7 +31,7 @@
 #include <linux/spinlock.h>
 #include <linux/of.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
 #include <asm/io.h>
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index 8ed0d2d0e1b5..fc5ae767989e 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -25,7 +25,7 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 1672db2d1b0e..4e44c4dcd11c 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -27,7 +27,7 @@
 #include <linux/cpu.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 265313e8396b..2d66275e489f 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -32,7 +32,7 @@
 #include <linux/fs_uart_pd.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/time.h>
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index ee056807b52c..9f51f97abb5d 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -23,7 +23,7 @@
 #include <asm/tsi108.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h
index d9db13810d15..29d756329228 100644
--- a/arch/s390/include/asm/atomic.h
+++ b/arch/s390/include/asm/atomic.h
@@ -108,7 +108,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != u;
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #undef __CS_LOOP
 
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 1ca3d1d6a86c..45df6d456aa1 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -27,7 +27,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/mathemu.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index e9372c77cced..ffabcd9d3363 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -36,7 +36,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/mathemu.h>
 #include <asm/cpcmd.h>
 #include <asm/lowcore.h>
diff --git a/arch/sh/include/asm/atomic.h b/arch/sh/include/asm/atomic.h
index c7983124d99d..8ddb2635cf92 100644
--- a/arch/sh/include/asm/atomic.h
+++ b/arch/sh/include/asm/atomic.h
@@ -30,7 +30,6 @@
 #define atomic_inc_and_test(v)		(atomic_inc_return(v) == 0)
 #define atomic_sub_and_test(i,v)	(atomic_sub_return((i), (v)) == 0)
 #define atomic_dec_and_test(v)		(atomic_sub_return(1, (v)) == 0)
-#define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
 
 #define atomic_inc(v)			atomic_add(1, (v))
 #define atomic_dec(v)			atomic_sub(1, (v))
diff --git a/arch/sh/include/asm/hw_irq.h b/arch/sh/include/asm/hw_irq.h
index 603cdde813d1..693d44184058 100644
--- a/arch/sh/include/asm/hw_irq.h
+++ b/arch/sh/include/asm/hw_irq.h
@@ -3,7 +3,7 @@
 
 #include <linux/init.h>
 #include <linux/sh_intc.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 extern atomic_t irq_err_count;
 
diff --git a/arch/sh/include/asm/smp.h b/arch/sh/include/asm/smp.h
index 9070d943ddde..78b0d0f4b24b 100644
--- a/arch/sh/include/asm/smp.h
+++ b/arch/sh/include/asm/smp.h
@@ -8,7 +8,7 @@
 #ifdef CONFIG_SMP
 
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/current.h>
 #include <asm/percpu.h>
 
diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c
index 425d604e3a28..84db0d6ccd0d 100644
--- a/arch/sh/kernel/idle.c
+++ b/arch/sh/kernel/idle.c
@@ -18,7 +18,7 @@
 #include <linux/smp.h>
 #include <asm/pgalloc.h>
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/smp.h>
 
 void (*pm_idle)(void) = NULL;
diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c
index 6207561ea34a..3147a9a6fb8b 100644
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -21,7 +21,7 @@
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c
index 67110be83fd7..cd3a40483299 100644
--- a/arch/sh/kernel/traps_64.c
+++ b/arch/sh/kernel/traps_64.c
@@ -28,7 +28,7 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/processor.h>
 #include <asm/pgtable.h>
 #include <asm/fpu.h>
diff --git a/arch/sh/kernel/unwinder.c b/arch/sh/kernel/unwinder.c
index 468889d958f4..521b5432471f 100644
--- a/arch/sh/kernel/unwinder.c
+++ b/arch/sh/kernel/unwinder.c
@@ -13,7 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <asm/unwinder.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * This is the most basic stack unwinder an architecture can
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index 7ae128b19d3f..7646f2cef5d0 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -52,7 +52,6 @@ extern void atomic_set(atomic_t *, int);
 #define atomic_dec_and_test(v) (atomic_dec_return(v) == 0)
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /* This is the old 24-bit implementation.  It's still used internally
  * by some sparc-specific code, notably the semaphore implementation.
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index bdb2ff880bdd..337139ef91be 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -85,7 +85,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 #define atomic64_cmpxchg(v, o, n) \
 	((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index 56bbaadef646..edd3d3cde460 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -21,7 +21,7 @@
 #include <linux/of_pdt.h>
 #include <linux/proc_fs.h>
 #include <linux/mutex.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define OF_ROOT_NODE_ADDR_CELLS_DEFAULT	2
 #define OF_ROOT_NODE_SIZE_CELLS_DEFAULT	1
diff --git a/arch/sparc/include/asm/smp_32.h b/arch/sparc/include/asm/smp_32.h
index 093f10843ff2..01c51c704341 100644
--- a/arch/sparc/include/asm/smp_32.h
+++ b/arch/sparc/include/asm/smp_32.h
@@ -22,7 +22,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/asi.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  *	Private routines/data
diff --git a/arch/sparc/include/asm/smp_64.h b/arch/sparc/include/asm/smp_64.h
index 20bca8950710..29862a9e9065 100644
--- a/arch/sparc/include/asm/smp_64.h
+++ b/arch/sparc/include/asm/smp_64.h
@@ -27,7 +27,7 @@
  */
 
 #include <linux/bitops.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/percpu.h>
 
 DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 4e78862d12fd..0dd8422a469c 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -26,7 +26,7 @@
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/irq.h>
 #include <asm/io.h>
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index fe8fb44c609c..1210fde18740 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -28,7 +28,7 @@
 #include <asm/tlbflush.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq_regs.h>
 #include <asm/traps.h>
 
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 62a034318b18..171e8d84dc3f 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -22,7 +22,7 @@
 #include <asm/stacktrace.h>
 #include <asm/cpudata.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/nmi.h>
 #include <asm/pcr.h>
 
diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c
index 21b125341bf7..f671e7fd6ddc 100644
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -22,7 +22,7 @@
 #include <linux/delay.h>
 
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <asm/irq.h>
 #include <asm/page.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 99cb17251bb5..4a442c32e117 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -28,7 +28,7 @@
 
 #include <asm/head.h>
 #include <asm/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/cpudata.h>
diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c
index d3c7a12ad879..1a371f8ae0b0 100644
--- a/arch/sparc/lib/atomic32.c
+++ b/arch/sparc/lib/atomic32.c
@@ -7,7 +7,7 @@
  * Based on asm-parisc/atomic.h Copyright (C) 2000 Philipp Rumpf
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
 
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 739cfe0499d1..e3272715c3cb 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -121,15 +121,6 @@ static inline int atomic_read(const atomic_t *v)
  */
 #define atomic_add_negative(i, v)	(atomic_add_return((i), (v)) < 0)
 
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-#define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
-
 /* Nonexistent functions intended to cause link errors. */
 extern unsigned long __xchg_called_with_bad_pointer(void);
 extern unsigned long __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index 92a8bee32311..246feed4794d 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -11,7 +11,7 @@
  *   NON INFRINGEMENT.  See the GNU General Public License for
  *   more details.
  *
- * Do not include directly; use <asm/atomic.h>.
+ * Do not include directly; use <linux/atomic.h>.
  */
 
 #ifndef _ASM_TILE_ATOMIC_32_H
@@ -21,7 +21,7 @@
 
 #ifndef __ASSEMBLY__
 
-/* Tile-specific routines to support <asm/atomic.h>. */
+/* Tile-specific routines to support <linux/atomic.h>. */
 int _atomic_xchg(atomic_t *v, int n);
 int _atomic_xchg_add(atomic_t *v, int i);
 int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 1c1e60d8ccb6..a48dda30cbcc 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -11,7 +11,7 @@
  *   NON INFRINGEMENT.  See the GNU General Public License for
  *   more details.
  *
- * Do not include directly; use <asm/atomic.h>.
+ * Do not include directly; use <linux/atomic.h>.
  */
 
 #ifndef _ASM_TILE_ATOMIC_64_H
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index d31ab905cfa7..571b118bfd9b 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -16,7 +16,7 @@
 #define _ASM_TILE_BITOPS_32_H
 
 #include <linux/compiler.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 
 /* Tile-specific routines to support <asm/bitops.h>. */
diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
index 68f8c5bc0679..e9c8e381ee0e 100644
--- a/arch/tile/include/asm/bitops_64.h
+++ b/arch/tile/include/asm/bitops_64.h
@@ -16,7 +16,7 @@
 #define _ASM_TILE_BITOPS_64_H
 
 #include <linux/compiler.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 
 /* See <asm/bitops.h> for API comments. */
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index a8f2c6e31a87..a5e4208d34f9 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -17,7 +17,7 @@
 #ifndef _ASM_TILE_SPINLOCK_32_H
 #define _ASM_TILE_SPINLOCK_32_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 #include <asm/system.h>
 #include <linux/compiler.h>
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 72ade79b621b..fc94607f0bd5 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -21,7 +21,7 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 #include <asm/irqflags.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/asm-offsets.h>
 #include <hv/hypervisor.h>
 #include <arch/abi.h>
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
index 46570211df52..771b251b409d 100644
--- a/arch/tile/lib/atomic_32.c
+++ b/arch/tile/lib/atomic_32.c
@@ -17,7 +17,7 @@
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/futex.h>
 #include <arch/chip.h>
 
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
index 24448734f6f1..1f75a2a56101 100644
--- a/arch/tile/lib/atomic_asm_32.S
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -70,7 +70,7 @@
  */
 
 #include <linux/linkage.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 5852519b2d0f..f6f5c53dc903 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -43,7 +43,7 @@
 #include <asm/mman.h>
 #include <asm/types.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/vgtod.h>
 #include <asm/sys_ia32.h>
 
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 4a0b7c7e2cce..7b3ca8324b69 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -8,7 +8,7 @@
 #include <asm/cpufeature.h>
 #include <asm/processor.h>
 #include <asm/apicdef.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
 #include <asm/system.h>
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 952a826ac4e5..897969bdd4e6 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -244,7 +244,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 /*
  * atomic_dec_if_positive - decrement by 1 if old value positive
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 13f5504c76c0..09199052060f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -21,7 +21,7 @@
 #include <linux/profile.h>
 #include <linux/smp.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/irq.h>
 #include <asm/sections.h>
 
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index 2e9972468a5d..9cdae5d47e8f 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -4,7 +4,7 @@
 #include <linux/percpu.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/asm.h>
 
 typedef struct {
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 716b48af7863..c9321f34e55b 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -124,7 +124,7 @@ extern struct atomic_notifier_head x86_mce_decoder_chain;
 
 #include <linux/percpu.h>
 #include <linux/init.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 extern int mce_disabled;
 extern int mce_p5_enabled;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8b5393ec1080..69021528b43c 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -2,7 +2,7 @@
 #define _ASM_X86_MMU_CONTEXT_H
 
 #include <asm/desc.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index df1287019e6d..644dd885f05a 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -19,7 +19,7 @@
 #include <linux/pci.h>
 
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/setup.h>
 #include <asm/irq_controller.h>
 
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index e9e51f710e6c..ee67edf86fdd 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_SPINLOCK_H
 #define _ASM_X86_SPINLOCK_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <linux/compiler.h>
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1f2e61e28981..a1fe5c127b52 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -21,7 +21,7 @@ struct task_struct;
 struct exec_domain;
 #include <asm/processor.h>
 #include <asm/ftrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index b117efd24f71..8a439d364b94 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -30,7 +30,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/mtrr.h>
 #include <asm/pgtable.h>
 #include <asm/proto.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b24be38c8cf8..52fa56399a50 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -38,7 +38,7 @@
 #include <asm/perf_event.h>
 #include <asm/x86_init.h>
 #include <asm/pgalloc.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/mpspec.h>
 #include <asm/i8259.h>
 #include <asm/proto.h>
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 9536b3fe43f8..5d513bc47b6b 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -48,7 +48,7 @@
 #include <linux/io.h>
 
 #include <asm/apicdef.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/fixmap.h>
 #include <asm/mpspec.h>
 #include <asm/setup.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 22a073d7fbff..62184390a601 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,7 +21,7 @@
 #include <linux/topology.h>
 #include <linux/cpumask.h>
 #include <asm/pgtable.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
 #include <asm/apic.h>
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 65b8f5c2eebf..610485223bdb 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -14,7 +14,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/timer.h>
 #include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index f09d4bbe2d2d..b3300e6bacef 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -15,7 +15,7 @@
 #include <linux/io.h>
 #include <linux/delay.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/timer.h>
 #include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index fbc097a085ca..9682ec50180c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -49,7 +49,7 @@
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/traps.h>
 #include <asm/desc.h>
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2b2255b1f04b..57dcbd4308fa 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -33,7 +33,7 @@
 #include <asm/page.h>
 #include <asm/current.h>
 #include <asm/apicdef.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
 #include "trace.h"
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
index abd86e865be3..ae432ea1cd83 100644
--- a/arch/x86/kvm/timer.c
+++ b/arch/x86/kvm/timer.c
@@ -15,7 +15,7 @@
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <linux/hrtimer.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "kvm_timer.h"
 
 static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c
index 540179e8e9fa..042f6826bf57 100644
--- a/arch/x86/lib/atomic64_32.c
+++ b/arch/x86/lib/atomic64_32.c
@@ -4,7 +4,7 @@
 
 #include <asm/processor.h>
 #include <asm/cmpxchg.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 long long atomic64_read_cx8(long long, const atomic64_t *v);
 EXPORT_SYMBOL(atomic64_read_cx8);
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index 3adff7dcc148..67421f38a215 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -34,7 +34,7 @@
 #include <asm/pgtable.h>
 #include <linux/mmiotrace.h>
 #include <asm/e820.h> /* for ISA_START_ADDRESS */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
 
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index a96a0619d0b7..7cca2fb18baf 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -248,7 +248,6 @@ static __inline__ int atomic_add_unless(atomic_t *v, int a, int u)
 	return c != (u);
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
 static inline void atomic_clear_mask(unsigned int mask, atomic_t *v)
 {
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index e3558b9a58ba..47041e7c088c 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -40,7 +40,7 @@
 #include <asm/platform.h>
 #include <asm/mmu.h>
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/asm-offsets.h>
 #include <asm/regs.h>
 
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 940d70cb5c25..ac33d5f30778 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -12,7 +12,7 @@
  *
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <crypto/if_alg.h>
 #include <linux/crypto.h>
 #include <linux/init.h>
diff --git a/crypto/proc.c b/crypto/proc.c
index 58fef67d4f4d..3808697814d7 100644
--- a/crypto/proc.c
+++ b/crypto/proc.c
@@ -13,7 +13,7 @@
  *
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/init.h>
 #include <linux/crypto.h>
 #include <linux/rwsem.h>
diff --git a/crypto/rng.c b/crypto/rng.c
index f93cb5311182..45229ae782be 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -12,7 +12,7 @@
  *
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <crypto/internal/rng.h>
 #include <linux/err.h>
 #include <linux/module.h>
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index bb3b016b6ce8..f8f41e0e8a8c 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -38,7 +38,7 @@
 #include <linux/ihex.h>
 #include <linux/slab.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
 
diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index 0b0625054a87..b22d71cac54c 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 
 extern int atm_init_aal5(struct atm_vcc *vcc); /* "raw" AAL5 transport */
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 3230ea0df83c..93071417315f 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/string.h>
 #include <asm/byteorder.h>
diff --git a/drivers/atm/eni.h b/drivers/atm/eni.h
index 493a6932507e..dc9a62cc2605 100644
--- a/drivers/atm/eni.h
+++ b/drivers/atm/eni.h
@@ -14,7 +14,7 @@
 #include <linux/time.h>
 #include <linux/pci.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "midway.h"
 
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index 7c7b571647f9..5072f8ac16fd 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -52,7 +52,7 @@
 #include <asm/system.h>
 #include <asm/string.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <linux/wait.h>
 
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index bc9e702186dd..361f5aee3be1 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -44,7 +44,7 @@
 #include <asm/dma.h>
 #include <asm/byteorder.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_SBUS
 #include <linux/of.h>
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 287506183893..b81210330aca 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -45,7 +45,7 @@
 
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/string.h>
 #include <asm/byteorder.h>
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index be0dbfeb541c..db06f34419cf 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -46,7 +46,7 @@
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 
 #ifdef CONFIG_ATM_IDT77252_USE_SUNI
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 957106f636ea..cb90f7a3e074 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -58,7 +58,7 @@
 #include <linux/slab.h>
 #include <asm/system.h>  
 #include <asm/io.h>  
-#include <asm/atomic.h>  
+#include <linux/atomic.h>
 #include <asm/uaccess.h>  
 #include <asm/string.h>  
 #include <asm/byteorder.h>  
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c
index 6b313ee9231b..1c70c45fa044 100644
--- a/drivers/atm/nicstar.c
+++ b/drivers/atm/nicstar.c
@@ -51,7 +51,7 @@
 #include <linux/idr.h>
 #include <asm/io.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "nicstar.h"
 #ifdef CONFIG_ATM_NICSTAR_USE_SUNI
 #include "suni.h"
diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c
index 41c56eae4c81..90f1ccca9e52 100644
--- a/drivers/atm/suni.c
+++ b/drivers/atm/suni.c
@@ -25,7 +25,7 @@
 #include <asm/system.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "suni.h"
 
diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c
index c45ae0573bbd..5120a96b3a89 100644
--- a/drivers/atm/uPD98402.c
+++ b/drivers/atm/uPD98402.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "uPD98402.h"
 
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 7f8c5132ff32..d889f56e8d8c 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -27,7 +27,7 @@
 #include <asm/system.h>
 #include <asm/string.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 #include "uPD98401.h"
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 45d7c8fc73bd..2840ed4668c1 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -24,7 +24,7 @@
 #include <linux/stat.h>
 #include <linux/slab.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 static DEFINE_MUTEX(mem_sysfs_mutex);
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 942d6a7c9ae1..17b7934f31cb 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -5,7 +5,7 @@
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/pm_runtime.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/jiffies.h>
 #include "power.h"
 
diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 696100241a6f..951a4e33b92b 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c
@@ -33,7 +33,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 320668f4c3aa..3302586655c4 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -52,7 +52,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_X86
 /*
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 25d139c9dbed..5c0d96a820fa 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -46,7 +46,7 @@
 #include <asm/page.h>
 #include <asm/system.h>
 #include <asm/pgtable.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/tlbflush.h>
 #include <asm/uncached.h>
 #include <asm/sn/addrs.h>
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 0debc17c8e28..3ee1fdb31ea7 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -29,7 +29,8 @@
 #include <linux/connector.h>
 #include <linux/gfp.h>
 #include <linux/ptrace.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
+
 #include <asm/unaligned.h>
 
 #include <linux/cn_proc.h>
diff --git a/drivers/edac/edac_stub.c b/drivers/edac/edac_stub.c
index aab970760b75..86ad2eee1201 100644
--- a/drivers/edac/edac_stub.c
+++ b/drivers/edac/edac_stub.c
@@ -14,7 +14,7 @@
  */
 #include <linux/module.h>
 #include <linux/edac.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/edac.h>
 
 int edac_op_state = EDAC_OPSTATE_INVAL;
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 29d2423fae6d..85661b060ed7 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -32,7 +32,7 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 
 #include "core.h"
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 95a471401892..8ba7f7928f1f 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -38,7 +38,7 @@
 #include <linux/string.h>
 #include <linux/workqueue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 #include <asm/system.h>
 
diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c
index 193ed9233144..94d3b494ddfb 100644
--- a/drivers/firewire/core-topology.c
+++ b/drivers/firewire/core-topology.c
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 #include <asm/system.h>
 
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 0fe4e4e6eda7..b45be5767529 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -9,7 +9,7 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct device;
 struct fw_card;
diff --git a/drivers/firewire/nosy.c b/drivers/firewire/nosy.c
index 0618145376ad..763626b739d1 100644
--- a/drivers/firewire/nosy.c
+++ b/drivers/firewire/nosy.c
@@ -37,7 +37,7 @@
 #include <linux/uaccess.h>
 #include <linux/wait.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 
 #include "nosy.h"
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index ef37a9b5a3cc..32807baf55e2 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -60,7 +60,7 @@
  *                          are considered as fatal)
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 021d2b6b556f..7fd4e3e5ad5f 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -29,7 +29,7 @@
  *    Dave Airlie
  */
 #include <linux/seq_file.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 2e618b5ac465..56619f64b6bf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -37,7 +37,7 @@
 #include <linux/mm.h>
 #include <linux/file.h>
 #include <linux/module.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define TTM_ASSERT_LOCKED(param)
 #define TTM_DEBUG(fmt, arg...)
diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index de41e55a944a..075daf44bce4 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -30,7 +30,7 @@
 
 #include "ttm/ttm_lock.h"
 #include "ttm/ttm_module.h"
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index ebddd443d91a..93577f2e2954 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -55,7 +55,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct ttm_object_file {
 	struct ttm_object_device *tdev;
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 170e751c283e..727e93daac3b 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -40,7 +40,7 @@
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_page_alloc.h"
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
index 7d231cf5d2ce..fe4104c6b764 100644
--- a/drivers/hwmon/sht15.c
+++ b/drivers/hwmon/sht15.c
@@ -32,7 +32,7 @@
 #include <linux/sht15.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /* Commands */
 #define SHT15_MEASURE_TEMP		0x03
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 0347eed4a167..40c835309e49 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -31,7 +31,7 @@
  */
 
 #include <rdma/ib_umem.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "iw_cxgb4.h"
 
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index f09914cccf53..54c0d23bad92 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -58,7 +58,7 @@
 #include <linux/cpu.h>
 #include <linux/device.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/abs_addr.h>
 #include <asm/ibmebus.h>
 #include <asm/io.h>
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 73bc18465c9c..c118663e4437 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -34,7 +34,7 @@
 
 #define TCPOPT_TIMESTAMP 8
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 7b6985a2e652..b3cc1e062b17 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -45,7 +45,7 @@
 
 #include <net/neighbour.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 7d5109bbd1ad..0bfa545675b8 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -39,7 +39,7 @@
 #include <linux/random.h>
 #include <linux/jiffies.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/isdn/gigaset/gigaset.h b/drivers/isdn/gigaset/gigaset.h
index 6dd360734cfd..212efaf9a4e4 100644
--- a/drivers/isdn/gigaset/gigaset.h
+++ b/drivers/isdn/gigaset/gigaset.h
@@ -34,7 +34,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/list.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define GIG_VERSION {0, 5, 0, 0}
 #define GIG_COMPAT  {0, 4, 0, 0}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index c8827ffd85bb..bae6c4e23d3f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -19,7 +19,7 @@
 #include <linux/workqueue.h>
 #include <linux/backing-dev.h>
 #include <linux/percpu.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/scatterlist.h>
 #include <asm/page.h>
 #include <asm/unaligned.h>
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index 819e37eaaeba..320401dec104 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/init.h>
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index aa4e570c2cb5..c3547016f0f1 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -19,7 +19,7 @@
 #include <linux/time.h>
 #include <linux/workqueue.h>
 #include <scsi/scsi_dh.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define DM_MSG_PREFIX "multipath"
 #define MESG_STR(x) x, sizeof(x)
diff --git a/drivers/md/dm-queue-length.c b/drivers/md/dm-queue-length.c
index f92b6cea9d9c..03a837aa5ce6 100644
--- a/drivers/md/dm-queue-length.c
+++ b/drivers/md/dm-queue-length.c
@@ -20,7 +20,7 @@
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/module.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define DM_MSG_PREFIX	"multipath queue-length"
 #define QL_MIN_IO	128
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 451c3bb176d2..bfe9c2333cea 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -17,7 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/delay.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define DM_MSG_PREFIX "table"
 
diff --git a/drivers/media/video/hdpvr/hdpvr-core.c b/drivers/media/video/hdpvr/hdpvr-core.c
index a27d93b503a5..5f1db46beb4e 100644
--- a/drivers/media/video/hdpvr/hdpvr-core.c
+++ b/drivers/media/video/hdpvr/hdpvr-core.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/usb.h>
 #include <linux/mutex.h>
 #include <linux/i2c.h>
diff --git a/drivers/media/video/tlg2300/pd-dvb.c b/drivers/media/video/tlg2300/pd-dvb.c
index edd78f8b1baa..d0da11ae19df 100644
--- a/drivers/media/video/tlg2300/pd-dvb.c
+++ b/drivers/media/video/tlg2300/pd-dvb.c
@@ -7,7 +7,7 @@
 
 #include "vendorcmds.h"
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 static void dvb_urb_cleanup(struct pd_dvb_adapter *pd_dvb);
 
diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c
index a4db26fa2f53..2c8954ec6859 100644
--- a/drivers/media/video/uvc/uvc_ctrl.c
+++ b/drivers/media/video/uvc/uvc_ctrl.c
@@ -20,7 +20,7 @@
 #include <linux/videodev2.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "uvcvideo.h"
 
diff --git a/drivers/media/video/uvc/uvc_queue.c b/drivers/media/video/uvc/uvc_queue.c
index f90ce9fce539..677691c44500 100644
--- a/drivers/media/video/uvc/uvc_queue.c
+++ b/drivers/media/video/uvc/uvc_queue.c
@@ -19,7 +19,7 @@
 #include <linux/videodev2.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "uvcvideo.h"
 
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 543a80395b7f..dde6533e8e6d 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -21,7 +21,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
diff --git a/drivers/media/video/uvc/uvc_video.c b/drivers/media/video/uvc/uvc_video.c
index 49994793cc77..8244167c8915 100644
--- a/drivers/media/video/uvc/uvc_video.c
+++ b/drivers/media/video/uvc/uvc_video.c
@@ -19,7 +19,7 @@
 #include <linux/videodev2.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/unaligned.h>
 
 #include <media/v4l2-common.h>
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index 74fbe56321ff..c8ed7b63fdf5 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -59,7 +59,7 @@
 #include <asm/dma.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index b05db55c8c8e..21b28fc6d912 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -26,7 +26,7 @@
 #include <linux/sched.h>
 #include <linux/mutex.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 
 #define PHANTOM_VERSION		"n0.9.8"
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 6f0e9403004b..97e6954304ea 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -44,7 +44,7 @@
  * SMP torture testing
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 
 #include <linux/compiler.h>
diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c
index e0f87cf1e2ba..d4f7dda39721 100644
--- a/drivers/net/atlx/atl2.c
+++ b/drivers/net/atlx/atl2.c
@@ -20,7 +20,7 @@
  * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/crc32.h>
 #include <linux/dma-mapping.h>
 #include <linux/etherdevice.h>
diff --git a/drivers/net/atlx/atl2.h b/drivers/net/atlx/atl2.h
index 78344ddf4bf0..bf9016ebdd9b 100644
--- a/drivers/net/atlx/atl2.h
+++ b/drivers/net/atlx/atl2.h
@@ -25,7 +25,7 @@
 #ifndef _ATL2_H_
 #define _ATL2_H_
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/netdevice.h>
 
 #ifndef _ATL2_HW_H_
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index b414f5ae0da5..646c86bcc545 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -98,7 +98,7 @@
 
 #include <net/checksum.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/byteorder.h>
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 086ce0418b29..e0638cb4b07c 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -40,7 +40,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/clk.h>
 #include <linux/gpio.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 MODULE_AUTHOR("Eugene Konev <ejka@imfi.kspu.ru>");
 MODULE_DESCRIPTION("TI AR7 ethernet driver (CPMAC)");
diff --git a/drivers/net/cxgb3/cxgb3_offload.c b/drivers/net/cxgb3/cxgb3_offload.c
index 32636a1d62a5..805076c54f1b 100644
--- a/drivers/net/cxgb3/cxgb3_offload.c
+++ b/drivers/net/cxgb3/cxgb3_offload.c
@@ -34,7 +34,7 @@
 #include <linux/slab.h>
 #include <net/neighbour.h>
 #include <linux/notifier.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/proc_fs.h>
 #include <linux/if_vlan.h>
 #include <net/netevent.h>
diff --git a/drivers/net/cxgb3/l2t.h b/drivers/net/cxgb3/l2t.h
index fd3eb07e3f40..7a12d52ed4fc 100644
--- a/drivers/net/cxgb3/l2t.h
+++ b/drivers/net/cxgb3/l2t.h
@@ -34,7 +34,7 @@
 
 #include <linux/spinlock.h>
 #include "t3cdev.h"
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 enum {
 	L2T_STATE_VALID,	/* entry is up to date */
diff --git a/drivers/net/cxgb3/t3cdev.h b/drivers/net/cxgb3/t3cdev.h
index be55e9ae74d1..705713b56636 100644
--- a/drivers/net/cxgb3/t3cdev.h
+++ b/drivers/net/cxgb3/t3cdev.h
@@ -33,7 +33,7 @@
 #define _T3CDEV_H_
 
 #include <linux/list.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/netdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
diff --git a/drivers/net/cxgb4/cxgb4_uld.h b/drivers/net/cxgb4/cxgb4_uld.h
index 1b48c0170145..b1d39b8d141a 100644
--- a/drivers/net/cxgb4/cxgb4_uld.h
+++ b/drivers/net/cxgb4/cxgb4_uld.h
@@ -38,7 +38,7 @@
 #include <linux/cache.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /* CPL message priority levels */
 enum {
diff --git a/drivers/net/cxgb4/l2t.h b/drivers/net/cxgb4/l2t.h
index 7bd8f42378ff..02b31d0c6410 100644
--- a/drivers/net/cxgb4/l2t.h
+++ b/drivers/net/cxgb4/l2t.h
@@ -37,7 +37,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/if_ether.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct adapter;
 struct l2t_data;
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 0d283781bc5e..2a5a34d2d67b 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -36,7 +36,7 @@
 #include <linux/tcp.h>
 #include <linux/semaphore.h>
 #include <linux/compat.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define SIXPACK_VERSION    "Revision: 0.3.0"
 
diff --git a/drivers/net/hamradio/dmascc.c b/drivers/net/hamradio/dmascc.c
index 52b14256e2c0..ce555d9ac02c 100644
--- a/drivers/net/hamradio/dmascc.c
+++ b/drivers/net/hamradio/dmascc.c
@@ -36,7 +36,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/sockios.h>
 #include <linux/workqueue.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/dma.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 838c5b673767..ba99af05bf62 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -43,7 +43,7 @@
 #include <linux/ipv6.h>
 #include <linux/slab.h>
 #include <asm/hvcall.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/vio.h>
 #include <asm/iommu.h>
 #include <asm/firmware.h>
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a47595760751..3cbda0851f83 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -33,7 +33,7 @@
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/uaccess.h>
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 4609bc0e2f56..10e5d985afa3 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -48,7 +48,7 @@
 #include <linux/slab.h>
 #include <asm/unaligned.h>
 #include <net/slhc_vj.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
diff --git a/drivers/net/wimax/i2400m/i2400m.h b/drivers/net/wimax/i2400m/i2400m.h
index 5eacc653a94d..c421a6141854 100644
--- a/drivers/net/wimax/i2400m/i2400m.h
+++ b/drivers/net/wimax/i2400m/i2400m.h
@@ -155,7 +155,7 @@
 #include <linux/netdevice.h>
 #include <linux/completion.h>
 #include <linux/rwsem.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <net/wimax.h>
 #include <linux/wimax/i2400m.h>
 #include <asm/byteorder.h>
diff --git a/drivers/net/wireless/b43legacy/b43legacy.h b/drivers/net/wireless/b43legacy/b43legacy.h
index 17a130d18dc9..a610a352102a 100644
--- a/drivers/net/wireless/b43legacy/b43legacy.h
+++ b/drivers/net/wireless/b43legacy/b43legacy.h
@@ -8,7 +8,7 @@
 #include <linux/stringify.h>
 #include <linux/netdevice.h>
 #include <linux/pci.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/io.h>
 
 #include <linux/ssb/ssb.h>
diff --git a/drivers/net/wireless/b43legacy/dma.h b/drivers/net/wireless/b43legacy/dma.h
index f89c34226288..686941c242fc 100644
--- a/drivers/net/wireless/b43legacy/dma.h
+++ b/drivers/net/wireless/b43legacy/dma.h
@@ -5,7 +5,7 @@
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
 #include <linux/linkage.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "b43legacy.h"
 
diff --git a/drivers/oprofile/oprofile_stats.h b/drivers/oprofile/oprofile_stats.h
index 0b54e46c3c14..38b6fc028984 100644
--- a/drivers/oprofile/oprofile_stats.h
+++ b/drivers/oprofile/oprofile_stats.h
@@ -10,7 +10,7 @@
 #ifndef OPROFILE_STATS_H
 #define OPROFILE_STATS_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct oprofile_stat_struct {
 	atomic_t sample_lost_no_mm;
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index d703e73fffa7..3fadf2f135e8 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -32,7 +32,7 @@
 #include <linux/pci_hotplug.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
 #include "cpci_hotplug.h"
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 492b7d807fe8..6fa215a38615 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -16,7 +16,7 @@
 #include <xen/interface/io/pciif.h>
 #include <asm/xen/pci.h>
 #include <linux/interrupt.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/time.h>
diff --git a/drivers/s390/block/dasd_eer.c b/drivers/s390/block/dasd_eer.c
index 77f778b7b070..16c5208c3dc7 100644
--- a/drivers/s390/block/dasd_eer.c
+++ b/drivers/s390/block/dasd_eer.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/ebcdic.h>
 
 #include "dasd_int.h"
diff --git a/drivers/s390/char/sclp_quiesce.c b/drivers/s390/char/sclp_quiesce.c
index 05909a7df8b3..a90a02c28d6a 100644
--- a/drivers/s390/char/sclp_quiesce.c
+++ b/drivers/s390/char/sclp_quiesce.c
@@ -13,7 +13,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/reboot.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/sigp.h>
 #include <asm/smp.h>
diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c
index c837d7419a6a..524d988d89dd 100644
--- a/drivers/s390/char/vmlogrdr.c
+++ b/drivers/s390/char/vmlogrdr.c
@@ -21,7 +21,7 @@
 #include <linux/types.h>
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/cpcmd.h>
 #include <asm/debug.h>
diff --git a/drivers/s390/cio/device.h b/drivers/s390/cio/device.h
index 7e297c7bb5ff..0b7245c72d5e 100644
--- a/drivers/s390/cio/device.h
+++ b/drivers/s390/cio/device.h
@@ -2,7 +2,7 @@
 #define S390_DEVICE_H
 
 #include <asm/ccwdev.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/wait.h>
 #include <linux/notifier.h>
 #include "io_sch.h"
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 570d4da10696..e58169c32474 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -15,7 +15,7 @@
 #include <linux/delay.h>
 #include <linux/gfp.h>
 #include <linux/kernel_stat.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/debug.h>
 #include <asm/qdio.h>
 
diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c
index 68be6e157126..2a1d4dfaf859 100644
--- a/drivers/s390/cio/qdio_thinint.c
+++ b/drivers/s390/cio/qdio_thinint.c
@@ -9,7 +9,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/kernel_stat.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/debug.h>
 #include <asm/qdio.h>
 #include <asm/airq.h>
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index f8134a44cefa..b77ae519d79c 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -41,7 +41,7 @@
 #include <linux/mutex.h>
 #include <asm/reset.h>
 #include <asm/airq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 #include <asm/isc.h>
 #include <linux/hrtimer.h>
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index 8e65447f76b7..88ad33ed5d38 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -36,7 +36,7 @@
 #include <linux/seq_file.h>
 #include <linux/compat.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <linux/hw_random.h>
 
diff --git a/drivers/s390/crypto/zcrypt_cex2a.c b/drivers/s390/crypto/zcrypt_cex2a.c
index 2176d00b395e..da171b5f3996 100644
--- a/drivers/s390/crypto/zcrypt_cex2a.c
+++ b/drivers/s390/crypto/zcrypt_cex2a.c
@@ -30,7 +30,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/err.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/crypto/zcrypt_mono.c b/drivers/s390/crypto/zcrypt_mono.c
index 44253fdd4136..eb313c3fb2d1 100644
--- a/drivers/s390/crypto/zcrypt_mono.c
+++ b/drivers/s390/crypto/zcrypt_mono.c
@@ -32,7 +32,7 @@
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
 #include <linux/compat.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/crypto/zcrypt_pcica.c b/drivers/s390/crypto/zcrypt_pcica.c
index 1afb69c75fea..d84816f144df 100644
--- a/drivers/s390/crypto/zcrypt_pcica.c
+++ b/drivers/s390/crypto/zcrypt_pcica.c
@@ -30,7 +30,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/err.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/crypto/zcrypt_pcicc.c b/drivers/s390/crypto/zcrypt_pcicc.c
index aa4c050a5694..bdbdbe192993 100644
--- a/drivers/s390/crypto/zcrypt_pcicc.c
+++ b/drivers/s390/crypto/zcrypt_pcicc.c
@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <linux/gfp.h>
 #include <linux/err.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/crypto/zcrypt_pcixcc.c b/drivers/s390/crypto/zcrypt_pcixcc.c
index 4f85eb725f4f..dd4737808e06 100644
--- a/drivers/s390/crypto/zcrypt_pcixcc.c
+++ b/drivers/s390/crypto/zcrypt_pcixcc.c
@@ -31,7 +31,7 @@
 #include <linux/err.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 #include "ap_bus.h"
diff --git a/drivers/s390/net/fsm.h b/drivers/s390/net/fsm.h
index 1e8b235d95b5..a4510cf59034 100644
--- a/drivers/s390/net/fsm.h
+++ b/drivers/s390/net/fsm.h
@@ -8,7 +8,7 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/string.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /**
  * Define this to get debugging messages.
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 2a4991d6d4d5..7cac873c7383 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -13,7 +13,7 @@
 #include <linux/slab.h>
 #include <scsi/fc/fc_fcp.h>
 #include <scsi/scsi_eh.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "zfcp_ext.h"
 #include "zfcp_dbf.h"
 #include "zfcp_fc.h"
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 740da4465447..965a1fccd66a 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -16,7 +16,7 @@
 #include <linux/mutex.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>		/* put_/get_user			*/
 #include <asm/io.h>
 
diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h
index 179ad77f6cc9..bd9e31e16249 100644
--- a/drivers/scsi/dpt/dpti_i2o.h
+++ b/drivers/scsi/dpt/dpti_i2o.h
@@ -22,7 +22,7 @@
 #include <linux/i2o-dev.h>
 
 #include <linux/notifier.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 
 /*
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 6bba23a26303..c6f99b1d2383 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -46,7 +46,7 @@
 #include <linux/cciss_ioctl.h>
 #include <linux/string.h>
 #include <linux/bitmap.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/kthread.h>
 #include "hpsa_cmd.h"
 #include "hpsa.h"
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index aa05e661d113..b97c8ab0c20e 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -54,7 +54,7 @@
 #include <scsi/libsas.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/sas_ata.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "pm8001_defs.h"
 
 #define DRV_NAME		"pm8001"
diff --git a/drivers/staging/octeon/ethernet-rx.c b/drivers/staging/octeon/ethernet-rx.c
index 0f22f0f47446..1a7c19ae766f 100644
--- a/drivers/staging/octeon/ethernet-rx.c
+++ b/drivers/staging/octeon/ethernet-rx.c
@@ -42,7 +42,7 @@
 #include <net/xfrm.h>
 #endif /* CONFIG_XFRM */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <asm/octeon/octeon.h>
 
diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index 6227571149f5..b445cd63f901 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -38,7 +38,7 @@
 #include <net/xfrm.h>
 #endif /* CONFIG_XFRM */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <asm/octeon/octeon.h>
 
diff --git a/drivers/staging/solo6x10/solo6x10.h b/drivers/staging/solo6x10/solo6x10.h
index fd59b093dd4d..17c06bd6cc91 100644
--- a/drivers/staging/solo6x10/solo6x10.h
+++ b/drivers/staging/solo6x10/solo6x10.h
@@ -29,7 +29,7 @@
 #include <linux/wait.h>
 #include <linux/delay.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-dev.h>
 #include <media/videobuf-core.h>
diff --git a/drivers/staging/tidspbridge/include/dspbridge/host_os.h b/drivers/staging/tidspbridge/include/dspbridge/host_os.h
index 1a38896f4331..a2f31c69d12e 100644
--- a/drivers/staging/tidspbridge/include/dspbridge/host_os.h
+++ b/drivers/staging/tidspbridge/include/dspbridge/host_os.h
@@ -18,7 +18,7 @@
 #define _HOST_OS_H_
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/semaphore.h>
 #include <linux/uaccess.h>
 #include <linux/irq.h>
diff --git a/drivers/staging/winbond/mds_s.h b/drivers/staging/winbond/mds_s.h
index eeedf0186365..07d835b3b706 100644
--- a/drivers/staging/winbond/mds_s.h
+++ b/drivers/staging/winbond/mds_s.h
@@ -3,7 +3,7 @@
 
 #include <linux/timer.h>
 #include <linux/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "localpara.h"
 #include "mac_structures.h"
diff --git a/drivers/staging/winbond/wb35reg_s.h b/drivers/staging/winbond/wb35reg_s.h
index eb274ffdd1ba..dc79faa4029f 100644
--- a/drivers/staging/winbond/wb35reg_s.h
+++ b/drivers/staging/winbond/wb35reg_s.h
@@ -3,7 +3,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct hw_data;
 
diff --git a/drivers/tty/bfin_jtag_comm.c b/drivers/tty/bfin_jtag_comm.c
index 03c285bb2f18..3a997760ec32 100644
--- a/drivers/tty/bfin_jtag_comm.c
+++ b/drivers/tty/bfin_jtag_comm.c
@@ -25,7 +25,7 @@
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define pr_init(fmt, args...) ({ static const __initconst char __fmt[] = fmt; printk(__fmt, ## args); })
 
diff --git a/drivers/tty/rocket.c b/drivers/tty/rocket.c
index 13043e8d37fe..6a1241c7f841 100644
--- a/drivers/tty/rocket.c
+++ b/drivers/tty/rocket.c
@@ -83,7 +83,7 @@
 #include <linux/wait.h>
 #include <linux/pci.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/unaligned.h>
 #include <linux/bitops.h>
 #include <linux/spinlock.h>
diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c
index 57421d776329..ddc487a2d42f 100644
--- a/drivers/tty/serial/dz.c
+++ b/drivers/tty/serial/dz.c
@@ -48,7 +48,7 @@
 #include <linux/sysrq.h>
 #include <linux/tty.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/bootinfo.h>
 #include <asm/io.h>
 #include <asm/system.h>
diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c
index ea2340b814e9..6bc2e3f876f4 100644
--- a/drivers/tty/serial/sb1250-duart.c
+++ b/drivers/tty/serial/sb1250-duart.c
@@ -39,7 +39,7 @@
 #include <linux/tty.h>
 #include <linux/types.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 #include <asm/war.h>
 
diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c
index 1a7fd3e70315..0aebd7121b56 100644
--- a/drivers/tty/serial/zs.c
+++ b/drivers/tty/serial/zs.c
@@ -65,7 +65,7 @@
 #include <linux/tty.h>
 #include <linux/types.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 
 #include <asm/dec/interrupts.h>
diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 02a02700b51d..a9a4eade7e80 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "u_audio.h"
 
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 8f3eae90919f..3ea4666be3d0 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -29,7 +29,7 @@
 #include <linux/device.h>
 #include <linux/etherdevice.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "u_ether.h"
 #include "rndis.h"
diff --git a/drivers/usb/gadget/uvc_queue.c b/drivers/usb/gadget/uvc_queue.c
index f7395ac5dc17..aa0ad34e0f1f 100644
--- a/drivers/usb/gadget/uvc_queue.c
+++ b/drivers/usb/gadget/uvc_queue.c
@@ -19,7 +19,7 @@
 #include <linux/videodev2.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "uvc.h"
 
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index a0037961e5bd..27e209a7222f 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -131,7 +131,7 @@
 #include <linux/usb.h>
 #include <linux/proc_fs.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/blkdev.h>
 #include "../../scsi/scsi.h"
 #include <scsi/scsi_host.h>
diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c
index 68ab460a735c..ac0d75a9005a 100644
--- a/drivers/usb/misc/appledisplay.c
+++ b/drivers/usb/misc/appledisplay.c
@@ -29,7 +29,7 @@
 #include <linux/backlight.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define APPLE_VENDOR_ID		0x05AC
 
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index b0a7a9e909a4..1a49ca9c8ea5 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -34,7 +34,7 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 
diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c
index ca80171f42c6..2acc7f504c51 100644
--- a/drivers/usb/wusbcore/wa-rpipe.c
+++ b/drivers/usb/wusbcore/wa-rpipe.c
@@ -58,7 +58,7 @@
  *  destination address.
  */
 #include <linux/init.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/bitmap.h>
 #include <linux/slab.h>
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 14c9abf0d800..a801e2821d03 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -11,7 +11,7 @@
 #include <linux/uio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /* This is for zerocopy, used buffer len is set to 1 when lower device DMA
  * done */
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index 019dbd3f12b2..b048417247e8 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -24,7 +24,7 @@
 #include <linux/backlight.h>
 #include <linux/gpio.h>
 #include <video/sh_mobile_lcdc.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "sh_mobile_lcdcfb.h"
 #include "sh_mobile_meram.h"
diff --git a/drivers/video/vermilion/vermilion.h b/drivers/video/vermilion/vermilion.h
index 7491abfcf1fc..43d11ec197fc 100644
--- a/drivers/video/vermilion/vermilion.h
+++ b/drivers/video/vermilion/vermilion.h
@@ -31,7 +31,7 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/mutex.h>
 
 #define VML_DEVICE_GPU 0x5002
diff --git a/drivers/w1/masters/matrox_w1.c b/drivers/w1/masters/matrox_w1.c
index 1550431ccb6a..334d1ccf9c92 100644
--- a/drivers/w1/masters/matrox_w1.c
+++ b/drivers/w1/masters/matrox_w1.c
@@ -20,7 +20,7 @@
  */
 
 #include <asm/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/io.h>
 
 #include <linux/delay.h>
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 10606c822756..6c136c19e982 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -33,7 +33,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "w1.h"
 #include "w1_log.h"
diff --git a/drivers/w1/w1_family.h b/drivers/w1/w1_family.h
index 89a0b1f2794b..98a1ac0f4693 100644
--- a/drivers/w1/w1_family.h
+++ b/drivers/w1/w1_family.h
@@ -24,7 +24,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define W1_FAMILY_DEFAULT	0
 #define W1_FAMILY_SMEM_01	0x01
diff --git a/drivers/watchdog/intel_scu_watchdog.c b/drivers/watchdog/intel_scu_watchdog.c
index ba4386066a42..1abdc0454c54 100644
--- a/drivers/watchdog/intel_scu_watchdog.c
+++ b/drivers/watchdog/intel_scu_watchdog.c
@@ -43,7 +43,7 @@
 #include <linux/signal.h>
 #include <linux/sfi.h>
 #include <asm/irq.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/intel_scu_ipc.h>
 #include <asm/apb_timer.h>
 #include <asm/mrst.h>
diff --git a/drivers/watchdog/sbc7240_wdt.c b/drivers/watchdog/sbc7240_wdt.c
index ff11504c376e..93ac58953122 100644
--- a/drivers/watchdog/sbc7240_wdt.c
+++ b/drivers/watchdog/sbc7240_wdt.c
@@ -29,7 +29,7 @@
 #include <linux/watchdog.h>
 #include <linux/io.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 
 #define SBC7240_PREFIX "sbc7240_wdt: "
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 8d27af4bd8b9..7083d08b2a21 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -25,7 +25,7 @@
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "ctree.h"
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 01d2d9ef609c..44a360ca8046 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -35,7 +35,7 @@
 #include <linux/buffer_head.h>
 #include <linux/rwsem.h>
 #include <linux/uio.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * How many user pages to map in one call to get_user_pages().  This determines
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f9cfd168fbe2..fe047d966dc5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -37,7 +37,7 @@
 #include <asm/system.h>
 #include <asm/io.h>
 #include <asm/mman.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * LOCKING:
diff --git a/fs/file_table.c b/fs/file_table.c
index 01e4c1e8e6b6..c322794f7360 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -25,7 +25,7 @@
 #include <linux/percpu.h>
 #include <linux/ima.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "internal.h"
 
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 29e1ace7953d..8a139ff1919f 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -16,7 +16,7 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/rcupdate.h>
 #include <linux/rculist_bl.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "gfs2.h"
 #include "incore.h"
diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h
index 76f856e284e4..7cf6cafcc007 100644
--- a/fs/nfs/cache_lib.h
+++ b/fs/nfs/cache_lib.h
@@ -6,7 +6,7 @@
 
 #include <linux/completion.h>
 #include <linux/sunrpc/cache.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * Deferred request handling
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b35d25b98da6..1940f1a56a5f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -53,7 +53,7 @@
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "internal.h"
 #include "iostat.h"
diff --git a/fs/notify/group.c b/fs/notify/group.c
index d309f38449cb..63fc294a4692 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -26,7 +26,7 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * Final freeing of a group
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 07ea8d3e6ea2..b13c00ac48eb 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -23,7 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 252ab1f6452b..e14587d55689 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -92,7 +92,7 @@
 #include <linux/spinlock.h>
 #include <linux/srcu.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index f39260f8f865..ee188158a224 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -43,7 +43,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index e86577d6c5c3..778fe6cae3b0 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -24,7 +24,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 2dabf813456c..fe8e7e928889 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -24,7 +24,7 @@
 #ifndef _LINUX_NTFS_INODE_H
 #define _LINUX_NTFS_INODE_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/fs.h>
 #include <linux/list.h>
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index a6227d219e93..d43729a760e2 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/posix_acl.h>
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ed257d141568..586174168e2a 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -10,7 +10,7 @@
 #include <linux/seq_file.h>
 #include <linux/swap.h>
 #include <linux/vmstat.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include "internal.h"
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 2ce1be9f6291..f72403c4b51a 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -59,7 +59,7 @@
 #include <linux/ctype.h>
 #include <linux/sched.h>
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/div64.h>
 #include <asm/acpi.h>
 #include <linux/slab.h>
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
index e994197f84b7..7f84414c5f53 100644
--- a/include/asm-generic/atomic.h
+++ b/include/asm-generic/atomic.h
@@ -138,8 +138,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
   return c != u;
 }
 
-#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
-
 static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 {
 	unsigned long flags;
diff --git a/include/asm-generic/local.h b/include/asm-generic/local.h
index c8a5d68541d7..9ceb03b4f466 100644
--- a/include/asm-generic/local.h
+++ b/include/asm-generic/local.h
@@ -2,7 +2,7 @@
 #define _ASM_GENERIC_LOCAL_H
 
 #include <linux/percpu.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/types.h>
 
 /*
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
index 02ac760c1a8b..5980002b8b7b 100644
--- a/include/asm-generic/local64.h
+++ b/include/asm-generic/local64.h
@@ -55,7 +55,7 @@ typedef struct {
 
 #else /* BITS_PER_LONG != 64 */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /* Don't use typedef: don't want them to be mixed with atomic_t's. */
 typedef struct {
diff --git a/include/drm/ttm/ttm_lock.h b/include/drm/ttm/ttm_lock.h
index 81ba0b0b891a..2e7f0c941b5d 100644
--- a/include/drm/ttm/ttm_lock.h
+++ b/include/drm/ttm/ttm_lock.h
@@ -51,7 +51,7 @@
 
 #include "ttm/ttm_object.h"
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /**
  * struct ttm_lock
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7a8db4155281..2dcb72bff4b6 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -7,7 +7,7 @@
 #include <linux/uio.h>
 #include <linux/rcupdate.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define AIO_MAXSEGS		4
 #define AIO_KIOGRP_NR_ATOMIC	8
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 381f4cec8260..49a83ca900ba 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -220,7 +220,7 @@ struct atm_cirange {
 #include <linux/skbuff.h> /* struct sk_buff */
 #include <linux/uio.h>
 #include <net/sock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_PROC_FS
 #include <linux/proc_fs.h>
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index bc6615d4132b..66fed6364122 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -2,6 +2,15 @@
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
 
+/**
+ * atomic_inc_not_zero - increment unless the number is zero
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1, so long as @v is non-zero.
+ * Returns non-zero if @v was non-zero, and zero otherwise.
+ */
+#define atomic_inc_not_zero(v)		atomic_add_unless((v), 1, 0)
+
 /**
  * atomic_inc_not_zero_hint - increment if not null
  * @v: pointer of type atomic_t
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index a008982e7c08..3b2f9cb82986 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -16,7 +16,7 @@
 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/writeback.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct page;
 struct device;
diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h
index 564d997e2168..ac4d9f8b52e9 100644
--- a/include/linux/bit_spinlock.h
+++ b/include/linux/bit_spinlock.h
@@ -3,7 +3,7 @@
 
 #include <linux/kernel.h>
 #include <linux/preempt.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  *  bit-based spin_lock()
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 503c8a6b3079..458f497738a4 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -12,7 +12,7 @@
 #include <linux/linkage.h>
 #include <linux/pagemap.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_BLOCK
 
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 645778ad899b..3081c58d696e 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -42,7 +42,7 @@
 #include <linux/mutex.h>
 #include <linux/err.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define CONFIGFS_ITEM_NAME_LEN	20
 
diff --git a/include/linux/connector.h b/include/linux/connector.h
index f696bccd48cb..0c69ad825b39 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -75,7 +75,7 @@ struct cn_msg {
 
 #ifdef __KERNEL__
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/list.h>
 #include <linux/workqueue.h>
diff --git a/include/linux/cred.h b/include/linux/cred.h
index f240f2fa0197..48e82af1159b 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/key.h>
 #include <linux/selinux.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct user_struct;
 struct cred;
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index a6a7a1c83f54..e5e468e9133d 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -17,7 +17,7 @@
 #ifndef _LINUX_CRYPTO_H
 #define _LINUX_CRYPTO_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3f22d8d6d8a3..d37d2a793099 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -1,7 +1,7 @@
 #ifndef __LINUX_DCACHE_H
 #define __LINUX_DCACHE_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
 #include <linux/rculist_bl.h>
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 2833452ea01c..5033fb88c107 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -2,7 +2,7 @@
 #define __LINUX_DEBUG_LOCKING_H
 
 #include <linux/kernel.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 
 struct task_struct;
diff --git a/include/linux/device.h b/include/linux/device.h
index 160d4ddb2499..c20dfbfc49b4 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -22,7 +22,7 @@
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/device.h>
 
 struct device;
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 36c66443bdfd..4a73257b47d0 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -12,7 +12,7 @@
 #ifndef _LINUX_EDAC_H_
 #define _LINUX_EDAC_H_
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/sysdev.h>
 
 #define EDAC_OPSTATE_INVAL	-1
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index a842db638380..3ff060ac7810 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -5,7 +5,7 @@
 
 #include <linux/types.h>
 #include <linux/debugfs.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * For explanation of the elements of this struct, see
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index df7e3cf82e97..82163c4b32c9 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * The default fd array needs to be at least BITS_PER_LONG,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9ee3f9fb0b4a..741956fa5bfd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 
 #ifdef __KERNEL__
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #endif
 
 /*
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 5e6f42789afe..84ccf8e04fa6 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -14,7 +14,7 @@
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 
 #define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 69ad89b50489..91d0e0a34ef3 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -16,7 +16,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index f6efed0039ed..a103732b7588 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -17,7 +17,7 @@
 #include <linux/kref.h>
 #include <linux/workqueue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/ptrace.h>
 #include <asm/system.h>
 #include <trace/events/irq.h>
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 83e745f3ead7..66f23dc5e76a 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -54,7 +54,7 @@ extern void jump_label_apply_nops(struct module *mod);
 
 #else
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define JUMP_LABEL_INIT {ATOMIC_INIT(0)}
 
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index aadff7cc2b84..529d9a0c75a5 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -16,7 +16,7 @@
 #ifdef	CONFIG_KGDB_KDB
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define KDB_POLL_FUNC_MAX	5
 extern int kdb_poll_idx;
diff --git a/include/linux/key.h b/include/linux/key.h
index 6ea4eebd3467..183a6af7715d 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -21,7 +21,7 @@
 #include <linux/rcupdate.h>
 #include <linux/sysctl.h>
 #include <linux/rwsem.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 10ca03d0a250..fa391835508d 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -16,7 +16,7 @@
 #include <linux/serial_8250.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #ifdef CONFIG_HAVE_ARCH_KGDB
 #include <asm/kgdb.h>
 #endif
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 9229b64ee3aa..668729cc0fe9 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -25,7 +25,7 @@
 #include <linux/kobject_ns.h>
 #include <linux/kernel.h>
 #include <linux/wait.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define UEVENT_HELPER_PATH_LEN		256
 #define UEVENT_NUM_ENVP			32	/* number of env pointers */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 387329e02303..53ef894bfa05 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -37,7 +37,7 @@
 #include <linux/completion.h>
 #include <linux/radix-tree.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define MAX_MSIX_P_PORT		17
 #define MAX_MSIX		64
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 9872d6ca58ae..8b74e9b1d0ad 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -14,7 +14,7 @@
 #include <linux/mm.h>
 #include <linux/percpu_counter.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0a2d3d620feb..be1ac8d7789b 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -16,7 +16,7 @@
 #include <linux/nodemask.h>
 #include <linux/pageblock-flags.h>
 #include <generated/bounds.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 
 /* Free memory management - zoned buddy allocator.  */
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 604f122a2326..33fe53d78110 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -14,7 +14,7 @@
 #include <linux/nodemask.h>
 #include <linux/spinlock.h>
 #include <linux/seqlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct super_block;
 struct vfsmount;
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 7f87217e9d1f..9121595a8ebf 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -15,7 +15,7 @@
 #include <linux/linkage.h>
 #include <linux/lockdep.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * Simple, straightforward mutexes with strict semantics:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ea6f4aa479d4..2ed0b6cf11c5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -34,7 +34,7 @@
 #include <linux/pm_qos_params.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 87694ca86914..08c444aa0411 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -7,7 +7,7 @@
 #include <linux/nfs_xdr.h>
 #include <linux/sunrpc/xprt.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct nfs4_session;
 struct nfs_iostats;
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 7f5cfd3b37dd..49c8727eeb57 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/printk.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
  
 /* Each escaped entry is prefixed by ESCAPE_CODE
  * then one of the following codes, then the
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4e4203a96312..3a5626df37ce 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -49,7 +49,7 @@
 #include <linux/compiler.h>
 #include <linux/errno.h>
 #include <linux/kobject.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/device.h>
 #include <linux/io.h>
 #include <linux/irqreturn.h>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3f2711ccf910..245bafdafd5e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -509,7 +509,7 @@ struct perf_guest_info_callbacks {
 #include <linux/cpu.h>
 #include <linux/irq_work.h>
 #include <linux/jump_label.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/local.h>
 
 #define PERF_MAX_STACK_DEPTH		255
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7da5fa845959..ad5186354d92 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -26,7 +26,7 @@
 #include <linux/workqueue.h>
 #include <linux/mod_devicetable.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define PHY_BASIC_FEATURES	(SUPPORTED_10baseT_Half | \
 				 SUPPORTED_10baseT_Full | \
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e7576cf9e32d..650af6deaf8f 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -5,7 +5,7 @@
 #include <linux/fs.h>
 #include <linux/spinlock.h>
 #include <linux/magic.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct net;
 struct completion;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 313b7defc088..cb7855699037 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -180,7 +180,7 @@ enum {
 #include <linux/dqblk_v1.h>
 #include <linux/dqblk_v2.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef long long qsize_t;	/* Type in which we store sizes */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 77950dfa0a9e..6a6741440cb7 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -15,7 +15,7 @@
 #include <linux/spinlock.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct rw_semaphore;
 
diff --git a/include/linux/sem.h b/include/linux/sem.h
index f2961afa2f66..1feb2de2ee57 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -77,7 +77,7 @@ struct  seminfo {
 #define SEMUSZ  20		/* sizeof struct sem_undo */
 
 #ifdef __KERNEL__
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/rcupdate.h>
 #include <linux/cache.h>
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a24218c9c84b..7b996ed86d5b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -20,7 +20,7 @@
 #include <linux/time.h>
 #include <linux/cache.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/types.h>
 #include <linux/spinlock.h>
 #include <linux/net.h>
diff --git a/include/linux/sonet.h b/include/linux/sonet.h
index 67ad11fcf88b..de8832dd272b 100644
--- a/include/linux/sonet.h
+++ b/include/linux/sonet.h
@@ -58,7 +58,7 @@ struct sonet_stats {
 
 #ifdef __KERNEL__
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct k_sonet_stats {
 #define __HANDLE_ITEM(i) atomic_t i
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 0b22d51258e6..7df6c17b0281 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -384,7 +384,7 @@ static inline void assert_spin_locked(spinlock_t *lock)
  * Pull the atomic_t declaration:
  * (asm-mips/atomic.h needs above definitions)
  */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 /**
  * atomic_dec_and_lock - lock on reaching reference count zero
  * @atomic: the atomic counter
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 8521067ed4f7..febc4dbec2ca 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -15,7 +15,7 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/xdr.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/rcupdate.h>
 
 /* size of the nodename buffer */
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index d1c79a906397..5efd8cef389e 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -15,7 +15,7 @@
 
 #include <linux/kref.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/proc_fs.h>
 
 /*
diff --git a/include/linux/sunrpc/timer.h b/include/linux/sunrpc/timer.h
index a67fd734c73b..697d6e69d61f 100644
--- a/include/linux/sunrpc/timer.h
+++ b/include/linux/sunrpc/timer.h
@@ -9,7 +9,7 @@
 #ifndef _LINUX_SUNRPC_TIMER_H
 #define _LINUX_SUNRPC_TIMER_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct rpc_rtt {
 	unsigned long timeo;	/* default timeout value */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 91d5fcc83116..14d62490922e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 #include <linux/node.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/page.h>
 
 struct notifier_block;
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index e2696d76a599..d7d2f2158142 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -17,7 +17,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/kobject_ns.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct kobject;
 struct module;
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index bcd942fa611c..65efb92da996 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -6,7 +6,7 @@
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/vm_event_item.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 extern int sysctl_stat_interval;
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2be2887c6958..0d556deb497b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -10,7 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/lockdep.h>
 #include <linux/threads.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct workqueue_struct;
 
diff --git a/include/net/ax25.h b/include/net/ax25.h
index 206d22297ac3..94e09d361bb1 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -11,7 +11,7 @@
 #include <linux/timer.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define	AX25_T1CLAMPLO  		1
 #define	AX25_T1CLAMPHI 			(30 * HZ)
diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h
index abd443604c9f..3b938743514b 100644
--- a/include/net/cipso_ipv4.h
+++ b/include/net/cipso_ipv4.h
@@ -41,7 +41,7 @@
 #include <linux/skbuff.h>
 #include <net/netlabel.h>
 #include <net/request_sock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /* known doi values */
 #define CIPSO_V4_DOI_UNKNOWN          0x00000000
diff --git a/include/net/flow.h b/include/net/flow.h
index c6d5fe5ec1bf..78113daadd63 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -8,7 +8,7 @@
 #define _NET_FLOW_H
 
 #include <linux/in6.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct flowi_common {
 	int	flowic_oif;
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index e9c2ed8af864..808fc5f76b03 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -33,7 +33,7 @@
 #include <net/tcp_states.h>
 #include <net/netns/hash.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 
 /* This is for all connections with a full identity, no wildcards.
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 17404b5388a7..f1a770977c4f 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -28,7 +28,7 @@
 #include <net/tcp_states.h>
 #include <net/timewait_sock.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct inet_hashinfo;
 
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 4233e6f9841d..78c83e62218f 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -13,7 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/rtnetlink.h>
 #include <net/ipv6.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct inetpeer_addr_base {
 	union {
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b1370c4015b6..1aaf915656f3 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -13,7 +13,7 @@
 #include <linux/sysctl.h>               /* for ctl_path */
 #include <linux/list.h>                 /* for struct list_head */
 #include <linux/spinlock.h>             /* for struct rwlock_t */
-#include <asm/atomic.h>                 /* for struct atomic_t */
+#include <linux/atomic.h>                 /* for struct atomic_t */
 #include <linux/compiler.h>
 #include <linux/timer.h>
 
diff --git a/include/net/lib80211.h b/include/net/lib80211.h
index 848cce1bb7a5..b95bbb083ee8 100644
--- a/include/net/lib80211.h
+++ b/include/net/lib80211.h
@@ -26,7 +26,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/module.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/if.h>
 #include <linux/skbuff.h>
 #include <linux/ieee80211.h>
diff --git a/include/net/llc.h b/include/net/llc.h
index 5503b74ab170..226c846cab08 100644
--- a/include/net/llc.h
+++ b/include/net/llc.h
@@ -20,7 +20,7 @@
 #include <linux/hash.h>
 #include <linux/jhash.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct net_device;
 struct packet_type;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 4ba8521490ba..2720884287c3 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -16,7 +16,7 @@
  *		- Add neighbour cache statistics like rtstat
  */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rcupdate.h>
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 1ab1aec209ac..3bb6fa0eace0 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -4,7 +4,7 @@
 #ifndef __NET_NET_NAMESPACE_H
 #define __NET_NET_NAMESPACE_H
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
 #include <linux/sysctl.h>
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 5d4f8e586e32..0b7f05e4a927 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -16,7 +16,7 @@
 
 #include <linux/bitops.h>
 #include <linux/compiler.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/netfilter/nf_conntrack_tcp.h>
 #include <linux/netfilter/nf_conntrack_dccp.h>
diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 9db401a8b4d9..f21a16ee3705 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -38,7 +38,7 @@
 #include <linux/in6.h>
 #include <net/netlink.h>
 #include <net/request_sock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct cipso_v4_doi;
 
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 341eb089349e..0249399e51a7 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -3,7 +3,7 @@
 
 #include <linux/list.h>
 #include <linux/list_nulls.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct ctl_table_header;
 struct nf_conntrack_ecache;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 31d7ea2e1d2a..f7d9c3fc06fd 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -60,7 +60,7 @@
 #include <linux/in6.h>		/* We get struct in6_addr     */
 #include <linux/ipv6.h>
 #include <asm/param.h>		/* We get MAXHOSTNAMELEN.     */
-#include <asm/atomic.h>		/* This gets us atomic counters.  */
+#include <linux/atomic.h>		/* This gets us atomic counters.  */
 #include <linux/skbuff.h>	/* We need sk_buff_head. */
 #include <linux/workqueue.h>	/* We need tq_struct.	 */
 #include <linux/sctp.h>		/* We need sctp* header structs.  */
diff --git a/include/pcmcia/ds.h b/include/pcmcia/ds.h
index 7b82080eb02c..3bbbd78e1439 100644
--- a/include/pcmcia/ds.h
+++ b/include/pcmcia/ds.h
@@ -26,7 +26,7 @@
 #include <linux/device.h>
 #include <linux/interrupt.h>
 #include <pcmcia/ss.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 
 /*
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index 1082afaed158..d44a56388a3e 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -38,7 +38,7 @@
 #include <linux/completion.h>
 #include <linux/compiler.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index bf4306aea169..228be3e220d9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -49,7 +49,7 @@
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
diff --git a/include/rxrpc/types.h b/include/rxrpc/types.h
index 327a5fc4719c..30d48f6da228 100644
--- a/include/rxrpc/types.h
+++ b/include/rxrpc/types.h
@@ -17,7 +17,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 typedef uint32_t	rxrpc_seq_t;	/* Rx message sequence number */
 typedef uint32_t	rxrpc_serial_t;	/* Rx message serial number */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index dd82e02ddde3..d371c3ca90c3 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -7,7 +7,7 @@
 #include <linux/workqueue.h>
 #include <linux/blkdev.h>
 #include <scsi/scsi.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 struct request_queue;
 struct scsi_cmnd;
diff --git a/kernel/audit.c b/kernel/audit.c
index 52501b5d4902..0a1355ca3d79 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -43,7 +43,7 @@
 
 #include <linux/init.h>
 #include <asm/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 00d79df03e76..ce4b054acee5 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -44,7 +44,7 @@
 
 #include <linux/init.h>
 #include <asm/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/fs.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a63507b92ca4..984458035d4a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -59,7 +59,7 @@
 #include <linux/poll.h>
 #include <linux/flex_array.h> /* used in cgroup_attach_proc */
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 static DEFINE_MUTEX(cgroup_mutex);
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f8bc977ccbbe..10131fdaff70 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -55,7 +55,7 @@
 #include <linux/sort.h>
 
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/cgroup.h>
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index bad6786dee88..0d7c08784efb 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -51,7 +51,7 @@
 
 #include <asm/cacheflush.h>
 #include <asm/byteorder.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/system.h>
 
 #include "debug_core.h"
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 7784bd216b6a..ddddb320be61 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -37,7 +37,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/percpu.h>
 #include <linux/notifier.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index ced72102adc2..98f51b13bb7e 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -33,7 +33,7 @@
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
 #include <linux/moduleparam.h>
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4e144876dc68..3b0c0986afc0 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -31,7 +31,7 @@
 #include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/completion.h>
diff --git a/kernel/rwsem.c b/kernel/rwsem.c
index 176e5e56ffab..9f48f3d82e9b 100644
--- a/kernel/rwsem.c
+++ b/kernel/rwsem.c
@@ -11,7 +11,7 @@
 #include <linux/rwsem.h>
 
 #include <asm/system.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * lock for reading
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c1124752e1d3..ba5070ce5765 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -19,7 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/kallsyms.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index fc0f22005417..d1db2880d1cf 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -28,7 +28,7 @@
 #include <linux/fs.h>
 #include <linux/file.h>
 #include <net/genetlink.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * Maximum length of a cpumask that can be specified in
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3f381d0b20a8..616846bcfee5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -2,7 +2,7 @@
 #define _LINUX_KERNEL_TRACE_H
 
 #include <linux/fs.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/sched.h>
 #include <linux/clocksource.h>
 #include <linux/ring_buffer.h>
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 017fa376505d..fd3c8aae55e5 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -12,7 +12,7 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "trace.h"
 #include "trace_output.h"
diff --git a/lib/atomic64.c b/lib/atomic64.c
index a21c12bc727c..e12ae0dd08a8 100644
--- a/lib/atomic64.c
+++ b/lib/atomic64.c
@@ -14,7 +14,7 @@
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * We use a hashed array of spinlocks to provide exclusive access
diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c
index 44524cc8c32a..0c33cde2a1e6 100644
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -10,7 +10,7 @@
  */
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
 static __init int test_atomic64(void)
diff --git a/lib/crc32.c b/lib/crc32.c
index 4855995fcde9..a6e633a48cea 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -26,7 +26,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "crc32defs.h"
 #if CRC_LE_BITS == 8
 # define tole(x) __constant_cpu_to_le32(x)
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index e73822aa6e9a..b5257725daad 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -1,6 +1,6 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * This is an implementation of the notion of "decrement a
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 4019979b2637..a56a851908d2 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -5,7 +5,7 @@
 #include <linux/list.h>
 #include <linux/cpumask.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/pgtable.h>
 #include <asm/mmu.h>
 
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index aacee45616fc..d6880f542f95 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -96,7 +96,7 @@
 
 #include <asm/sections.h>
 #include <asm/processor.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <linux/kmemcheck.h>
 #include <linux/kmemleak.h>
diff --git a/mm/slob.c b/mm/slob.c
index 0ae881831ae2..bf3918187165 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -70,7 +70,7 @@
 
 #include <trace/events/kmem.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * slob_block has a field 'units', which indicates size of block if +ve,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ab8494cde007..464621d18eb2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -26,7 +26,7 @@
 #include <linux/rcupdate.h>
 #include <linux/pfn.h>
 #include <linux/kmemleak.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index fc63526d8695..f41f02656ff4 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -9,7 +9,7 @@
 #include <linux/sonet.h>
 #include <linux/bitops.h>
 #include <linux/errno.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 int atm_charge(struct atm_vcc *vcc, int truesize)
 {
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 4bc8c67ecb14..852394072fa1 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -37,7 +37,7 @@
 #include <linux/uaccess.h>
 #include <asm/byteorder.h> /* for htons etc. */
 #include <asm/system.h> /* save/restore_flags */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "common.h"
 #include "resources.h"
diff --git a/net/atm/common.c b/net/atm/common.c
index 22b963d06a10..14ff9fe39989 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -23,7 +23,7 @@
 #include <linux/uaccess.h>
 #include <linux/poll.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "resources.h"		/* atm_find_dev */
 #include "common.h"		/* prototypes */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index ba48daa68c1f..215c9fad7cdf 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1335,7 +1335,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/param.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/inetdevice.h>
 #include <net/route.h>
 
diff --git a/net/atm/proc.c b/net/atm/proc.c
index be3afdefec58..0d020de8d233 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -27,7 +27,7 @@
 #include <net/atmclip.h>
 #include <linux/uaccess.h>
 #include <linux/param.h> /* for HZ */
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "resources.h"
 #include "common.h" /* atm_proc_init prototype */
 #include "signaling.h" /* to get sigd - ugly too */
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e0dfbc151dd7..68def3b7fb49 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -21,7 +21,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <linux/slab.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/unaligned.h>
 #include "br_private.h"
 
diff --git a/net/core/flow.c b/net/core/flow.c
index 990703b8863b..bf32c33cad3b 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -22,7 +22,7 @@
 #include <linux/cpumask.h>
 #include <linux/mutex.h>
 #include <net/flow.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/security.h>
 
 struct flow_cache_entry {
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 2bd8e53d7774..9e885f180b60 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -30,7 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/timer.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 0dc3fe61085b..7f0eb087dc11 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -38,7 +38,7 @@
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
 #include <linux/jhash.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <net/net_namespace.h>
 #include <net/neighbour.h>
 #include <net/dst.h>
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index cd0354e9bdb3..a9a62f225a6b 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -25,7 +25,7 @@
 #include <linux/netdevice.h>
 #include <linux/timer.h>
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
index 09825711d58a..67f691bd4acf 100644
--- a/net/decnet/dn_timer.c
+++ b/net/decnet/dn_timer.c
@@ -22,7 +22,7 @@
 #include <linux/timer.h>
 #include <linux/spinlock.h>
 #include <net/sock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <net/flow.h>
 #include <net/dn.h>
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 2b3c23c287cd..2c2a98e402e7 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -50,7 +50,7 @@
 #include <net/tcp.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/bug.h>
 #include <asm/unaligned.h>
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 08526786dc39..1457acb39cec 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -38,7 +38,7 @@
  */
 
 #include <linux/types.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/byteorder.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 36c2842a86b2..0bc98886c383 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -40,7 +40,7 @@
 #include <linux/slab.h>
 
 #include <asm/uaccess.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <net/icmp.h>
 #include <net/ip.h>
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index f2b713847b45..075a3808aa40 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -51,7 +51,7 @@
 #include <linux/cpu.h>
 #include <linux/reboot.h>
 #include <net/iucv/iucv.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/ebcdic.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ed8a2335442f..ad4ac2601a56 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -55,7 +55,7 @@
 #include <net/protocol.h>
 
 #include <asm/byteorder.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "l2tp_core.h"
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 39a21d0c61c4..f42cd0915966 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -97,7 +97,7 @@
 #include <net/xfrm.h>
 
 #include <asm/byteorder.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "l2tp_core.h"
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 2e7ccbb43ddb..2d8158acf6fa 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -33,7 +33,7 @@
 #include <net/netfilter/nf_log.h>
 #include <net/netfilter/nfnetlink_log.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 #include "../bridge/br_private.h"
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 49132bddd73e..00bd475eab4b 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -31,7 +31,7 @@
 #include <net/sock.h>
 #include <net/netfilter/nf_queue.h>
 
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #ifdef CONFIG_BRIDGE_NETFILTER
 #include "../bridge/br_private.h"
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index bae5756b1626..dd53a36d89af 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -39,7 +39,7 @@
 #include <net/genetlink.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "netlabel_user.h"
 #include "netlabel_cipso_v4.h"
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 1b83e0009d8d..b528dd928d3c 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -39,7 +39,7 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <asm/bug.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "netlabel_domainhash.h"
 #include "netlabel_unlabeled.h"
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 4f251b19fbcc..dff8a0809245 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -42,7 +42,7 @@
 #include <net/ipv6.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "netlabel_domainhash.h"
 #include "netlabel_user.h"
diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h
index 0a25838bcf45..8db37f4c10f7 100644
--- a/net/netlabel/netlabel_mgmt.h
+++ b/net/netlabel/netlabel_mgmt.h
@@ -32,7 +32,7 @@
 #define _NETLABEL_MGMT_H
 
 #include <net/netlabel.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 /*
  * The following NetLabel payloads are supported by the management interface.
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 9a290ef5c175..f1ecf848e3ac 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -52,7 +52,7 @@
 #include <net/net_namespace.h>
 #include <net/netlabel.h>
 #include <asm/bug.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "netlabel_user.h"
 #include "netlabel_addrlist.h"
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cae761a8536c..ddf05288d9f1 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -42,7 +42,7 @@
 
 #include <linux/wait.h> 		/* wait_queue_head_t, etc */
 #include <linux/spinlock.h> 		/* spinlock_t, etc */
-#include <asm/atomic.h>			/* atomic_t, etc */
+#include <linux/atomic.h>			/* atomic_t, etc */
 
 #include <rdma/rdma_cm.h>		/* RDMA connection api */
 #include <rdma/ib_verbs.h>		/* RDMA verbs api */
diff --git a/net/tipc/core.h b/net/tipc/core.h
index d234a98a460a..2761af36d141 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -47,7 +47,7 @@
 #include <linux/string.h>
 #include <asm/uaccess.h>
 #include <linux/interrupt.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <asm/hardirq.h>
 #include <linux/netdevice.h>
 #include <linux/in.h>
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 9f4c77dca35f..a38316b2e3f6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -57,7 +57,7 @@
 #include <net/netlabel.h>
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>	/* for network interface checks */
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 68178b76a2b3..48665ecd1197 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -46,7 +46,7 @@
 #include <net/xfrm.h>
 #include <net/checksum.h>
 #include <net/udp.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include "avc.h"
 #include "objsec.h"
diff --git a/sound/pci/echoaudio/darla20.c b/sound/pci/echoaudio/darla20.c
index fe7ad64dccd7..43c7e12bc05d 100644
--- a/sound/pci/echoaudio/darla20.c
+++ b/sound/pci/echoaudio/darla20.c
@@ -52,7 +52,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/darla20_dsp.fw");
diff --git a/sound/pci/echoaudio/darla24.c b/sound/pci/echoaudio/darla24.c
index d1fd34b1a8e3..95b03306e026 100644
--- a/sound/pci/echoaudio/darla24.c
+++ b/sound/pci/echoaudio/darla24.c
@@ -56,7 +56,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/darla24_dsp.fw");
diff --git a/sound/pci/echoaudio/echo3g.c b/sound/pci/echoaudio/echo3g.c
index 1dffdc54416d..8723c40183e6 100644
--- a/sound/pci/echoaudio/echo3g.c
+++ b/sound/pci/echoaudio/echo3g.c
@@ -64,7 +64,7 @@
 #include <sound/initval.h>
 #include <sound/rawmidi.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/gina20.c b/sound/pci/echoaudio/gina20.c
index 050e54aa693f..0058c67115df 100644
--- a/sound/pci/echoaudio/gina20.c
+++ b/sound/pci/echoaudio/gina20.c
@@ -56,7 +56,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/gina20_dsp.fw");
diff --git a/sound/pci/echoaudio/gina24.c b/sound/pci/echoaudio/gina24.c
index 5748fc6d29d6..14e4925e76cc 100644
--- a/sound/pci/echoaudio/gina24.c
+++ b/sound/pci/echoaudio/gina24.c
@@ -62,7 +62,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/indigo.c b/sound/pci/echoaudio/indigo.c
index 4ae5e35cb5f1..f416b154f146 100644
--- a/sound/pci/echoaudio/indigo.c
+++ b/sound/pci/echoaudio/indigo.c
@@ -54,7 +54,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/indigodj.c b/sound/pci/echoaudio/indigodj.c
index 3550715bab1c..e594a3b2766e 100644
--- a/sound/pci/echoaudio/indigodj.c
+++ b/sound/pci/echoaudio/indigodj.c
@@ -54,7 +54,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/indigodjx.c b/sound/pci/echoaudio/indigodjx.c
index 19b191fd0120..f0d00bfceee5 100644
--- a/sound/pci/echoaudio/indigodjx.c
+++ b/sound/pci/echoaudio/indigodjx.c
@@ -54,7 +54,7 @@
 #include <sound/pcm_params.h>
 #include <sound/asoundef.h>
 #include <sound/initval.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/indigoio.c b/sound/pci/echoaudio/indigoio.c
index a9fcedf317a4..1af0037304c6 100644
--- a/sound/pci/echoaudio/indigoio.c
+++ b/sound/pci/echoaudio/indigoio.c
@@ -55,7 +55,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/indigoiox.c b/sound/pci/echoaudio/indigoiox.c
index bcdfac63212c..0b51163452b5 100644
--- a/sound/pci/echoaudio/indigoiox.c
+++ b/sound/pci/echoaudio/indigoiox.c
@@ -55,7 +55,7 @@
 #include <sound/pcm_params.h>
 #include <sound/asoundef.h>
 #include <sound/initval.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/layla20.c b/sound/pci/echoaudio/layla20.c
index d3a98c5dac86..3f63ab8dfff3 100644
--- a/sound/pci/echoaudio/layla20.c
+++ b/sound/pci/echoaudio/layla20.c
@@ -62,7 +62,7 @@
 #include <sound/initval.h>
 #include <sound/rawmidi.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/layla20_dsp.fw");
diff --git a/sound/pci/echoaudio/layla24.c b/sound/pci/echoaudio/layla24.c
index 2a1dca6dce17..283137244472 100644
--- a/sound/pci/echoaudio/layla24.c
+++ b/sound/pci/echoaudio/layla24.c
@@ -64,7 +64,7 @@
 #include <sound/initval.h>
 #include <sound/rawmidi.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/mia.c b/sound/pci/echoaudio/mia.c
index 9cdf14cfdd74..eddaeb4da50e 100644
--- a/sound/pci/echoaudio/mia.c
+++ b/sound/pci/echoaudio/mia.c
@@ -63,7 +63,7 @@
 #include <sound/initval.h>
 #include <sound/rawmidi.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/echoaudio/mona.c b/sound/pci/echoaudio/mona.c
index 1047be405ebe..0364011c237d 100644
--- a/sound/pci/echoaudio/mona.c
+++ b/sound/pci/echoaudio/mona.c
@@ -60,7 +60,7 @@
 #include <sound/asoundef.h>
 #include <sound/initval.h>
 #include <asm/io.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 #include "echoaudio.h"
 
 MODULE_FIRMWARE("ea/loader_dsp.fw");
diff --git a/sound/pci/lx6464es/lx6464es.h b/sound/pci/lx6464es/lx6464es.h
index e2a124ae27e8..6792eda9c9a5 100644
--- a/sound/pci/lx6464es/lx6464es.h
+++ b/sound/pci/lx6464es/lx6464es.h
@@ -26,7 +26,7 @@
 #define LX6464ES_H
 
 #include <linux/spinlock.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 #include <sound/core.h>
 #include <sound/pcm.h>
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 73f9cbacc077..1b839a0f3653 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -69,7 +69,7 @@
 
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
 
 MODULE_AUTHOR("Rudolf Koenig, Brent Baccala and Martin Habets");
 MODULE_DESCRIPTION("Sun DBRI");
-- 
cgit v1.2.3


From b196be89cdc14a88cc637cdad845a75c5886c82d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 10 Jan 2012 15:11:35 -0800
Subject: workqueue: make alloc_workqueue() take printf fmt and args for name

alloc_workqueue() currently expects the passed in @name pointer to remain
accessible.  This is inconvenient and a bit silly given that the whole wq
is being dynamically allocated.  This patch updates alloc_workqueue() and
friends to take printf format string instead of opaque string and matching
varargs at the end.  The name is allocated together with the wq and
formatted.

alloc_ordered_workqueue() is converted to a macro to unify varargs
handling with alloc_workqueue(), and, while at it, add comment to
alloc_workqueue().

None of the current in-kernel users pass in string with '%' as constant
name and this change shouldn't cause any problem.

[akpm@linux-foundation.org: use __printf]
Signed-off-by: Tejun Heo <tj@kernel.org>
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 47 +++++++++++++++++++++++++++++++----------------
 kernel/workqueue.c        | 32 ++++++++++++++++++++++----------
 2 files changed, 53 insertions(+), 26 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0d556deb497b..eb8b9f15f2e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,32 +297,50 @@ extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 
 extern struct workqueue_struct *
-__alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
-		      struct lock_class_key *key, const char *lock_name);
+__alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
+	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
 
+/**
+ * alloc_workqueue - allocate a workqueue
+ * @fmt: printf format for the name of the workqueue
+ * @flags: WQ_* flags
+ * @max_active: max in-flight work items, 0 for default
+ * @args: args for @fmt
+ *
+ * Allocate a workqueue with the specified parameters.  For detailed
+ * information on WQ_* flags, please refer to Documentation/workqueue.txt.
+ *
+ * The __lock_name macro dance is to guarantee that single lock_class_key
+ * doesn't end up with different namesm, which isn't allowed by lockdep.
+ *
+ * RETURNS:
+ * Pointer to the allocated workqueue on success, %NULL on failure.
+ */
 #ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(name, flags, max_active)		\
+#define alloc_workqueue(fmt, flags, max_active, args...)	\
 ({								\
 	static struct lock_class_key __key;			\
 	const char *__lock_name;				\
 								\
-	if (__builtin_constant_p(name))				\
-		__lock_name = (name);				\
+	if (__builtin_constant_p(fmt))				\
+		__lock_name = (fmt);				\
 	else							\
-		__lock_name = #name;				\
+		__lock_name = #fmt;				\
 								\
-	__alloc_workqueue_key((name), (flags), (max_active),	\
-			      &__key, __lock_name);		\
+	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+			      &__key, __lock_name, ##args);	\
 })
 #else
-#define alloc_workqueue(name, flags, max_active)		\
-	__alloc_workqueue_key((name), (flags), (max_active), NULL, NULL)
+#define alloc_workqueue(fmt, flags, max_active, args...)	\
+	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+			      NULL, NULL, ##args)
 #endif
 
 /**
  * alloc_ordered_workqueue - allocate an ordered workqueue
- * @name: name of the workqueue
+ * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
+ * @args: args for @fmt
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
@@ -331,11 +349,8 @@ __alloc_workqueue_key(const char *name, unsigned int flags, int max_active,
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-static inline struct workqueue_struct *
-alloc_ordered_workqueue(const char *name, unsigned int flags)
-{
-	return alloc_workqueue(name, WQ_UNBOUND | flags, 1);
-}
+#define alloc_ordered_workqueue(fmt, flags, args...)		\
+	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
 
 #define create_workqueue(name)					\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 42fa9ad0a810..bec7b5b53e03 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -242,10 +242,10 @@ struct workqueue_struct {
 
 	int			nr_drainers;	/* W: drain in progress */
 	int			saved_max_active; /* W: saved cwq max_active */
-	const char		*name;		/* I: workqueue name */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
+	char			name[];		/* I: workqueue name */
 };
 
 struct workqueue_struct *system_wq __read_mostly;
@@ -2954,14 +2954,29 @@ static int wq_clamp_max_active(int max_active, unsigned int flags,
 	return clamp_val(max_active, 1, lim);
 }
 
-struct workqueue_struct *__alloc_workqueue_key(const char *name,
+struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 					       unsigned int flags,
 					       int max_active,
 					       struct lock_class_key *key,
-					       const char *lock_name)
+					       const char *lock_name, ...)
 {
+	va_list args, args1;
 	struct workqueue_struct *wq;
 	unsigned int cpu;
+	size_t namelen;
+
+	/* determine namelen, allocate wq and format name */
+	va_start(args, lock_name);
+	va_copy(args1, args);
+	namelen = vsnprintf(NULL, 0, fmt, args) + 1;
+
+	wq = kzalloc(sizeof(*wq) + namelen, GFP_KERNEL);
+	if (!wq)
+		goto err;
+
+	vsnprintf(wq->name, namelen, fmt, args1);
+	va_end(args);
+	va_end(args1);
 
 	/*
 	 * Workqueues which may be used during memory reclaim should
@@ -2978,12 +2993,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 		flags |= WQ_HIGHPRI;
 
 	max_active = max_active ?: WQ_DFL_ACTIVE;
-	max_active = wq_clamp_max_active(max_active, flags, name);
-
-	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
-	if (!wq)
-		goto err;
+	max_active = wq_clamp_max_active(max_active, flags, wq->name);
 
+	/* init wq */
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
@@ -2991,7 +3003,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
-	wq->name = name;
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
@@ -3020,7 +3031,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *name,
 		if (!rescuer)
 			goto err;
 
-		rescuer->task = kthread_create(rescuer_thread, wq, "%s", name);
+		rescuer->task = kthread_create(rescuer_thread, wq, "%s",
+					       wq->name);
 		if (IS_ERR(rescuer->task))
 			goto err;
 
-- 
cgit v1.2.3


From 62d3c5439c534b0e6c653fc63e6d8c67be3a57b1 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 2 Mar 2012 10:51:00 +0100
Subject: Block: use a freezable workqueue for disk-event polling

This patch (as1519) fixes a bug in the block layer's disk-events
polling.  The polling is done by a work routine queued on the
system_nrt_wq workqueue.  Since that workqueue isn't freezable, the
polling continues even in the middle of a system sleep transition.

Obviously, polling a suspended drive for media changes and such isn't
a good thing to do; in the case of USB mass-storage devices it can
lead to real problems requiring device resets and even re-enumeration.

The patch fixes things by creating a new system-wide, non-reentrant,
freezable workqueue and using it for disk-events polling.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: <stable@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c             | 10 +++++-----
 include/linux/workqueue.h |  4 ++++
 kernel/workqueue.c        |  7 ++++++-
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/block/genhd.c b/block/genhd.c
index b26c4085590d..df9816ede75b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1478,9 +1478,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	intv = disk_events_poll_jiffies(disk);
 	set_timer_slack(&ev->dwork.timer, intv / 4);
 	if (check_now)
-		queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
 	else if (intv)
-		queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
 out_unlock:
 	spin_unlock_irqrestore(&ev->lock, flags);
 }
@@ -1524,7 +1524,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 	ev->clearing |= mask;
 	if (!ev->block) {
 		cancel_delayed_work(&ev->dwork);
-		queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
 	}
 	spin_unlock_irq(&ev->lock);
 }
@@ -1561,7 +1561,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 
 	/* uncondtionally schedule event check and wait for it to finish */
 	disk_block_events(disk);
-	queue_delayed_work(system_nrt_wq, &ev->dwork, 0);
+	queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
 	flush_delayed_work(&ev->dwork);
 	__disk_unblock_events(disk, false);
 
@@ -1598,7 +1598,7 @@ static void disk_events_workfn(struct work_struct *work)
 
 	intv = disk_events_poll_jiffies(disk);
 	if (!ev->block && intv)
-		queue_delayed_work(system_nrt_wq, &ev->dwork, intv);
+		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
 
 	spin_unlock_irq(&ev->lock);
 
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index eb8b9f15f2e0..af155450cabb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -289,12 +289,16 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
+ *
+ * system_nrt_freezable_wq is equivalent to system_nrt_wq except that
+ * it's freezable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
+extern struct workqueue_struct *system_nrt_freezable_wq;
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bec7b5b53e03..f2c5638bb5ab 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -253,11 +253,13 @@ struct workqueue_struct *system_long_wq __read_mostly;
 struct workqueue_struct *system_nrt_wq __read_mostly;
 struct workqueue_struct *system_unbound_wq __read_mostly;
 struct workqueue_struct *system_freezable_wq __read_mostly;
+struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_wq);
 EXPORT_SYMBOL_GPL(system_long_wq);
 EXPORT_SYMBOL_GPL(system_nrt_wq);
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3833,8 +3835,11 @@ static int __init init_workqueues(void)
 					    WQ_UNBOUND_MAX_ACTIVE);
 	system_freezable_wq = alloc_workqueue("events_freezable",
 					      WQ_FREEZABLE, 0);
+	system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
+			WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
 	BUG_ON(!system_wq || !system_long_wq || !system_nrt_wq ||
-	       !system_unbound_wq || !system_freezable_wq);
+	       !system_unbound_wq || !system_freezable_wq ||
+		!system_nrt_freezable_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From 0a13c00e9d4502b8e3fd9260ce781758ff2c3970 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:44 -0700
Subject: workqueue: reorder queueing functions so that _on() variants are on
 top

Currently, queue/schedule[_delayed]_work_on() are located below the
counterpart without the _on postifx even though the latter is usually
implemented using the former.  Swap them.

This is cleanup and doesn't cause any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  13 ++---
 kernel/workqueue.c        | 124 +++++++++++++++++++++++-----------------------
 2 files changed, 69 insertions(+), 68 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index af155450cabb..597034276611 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -365,23 +365,24 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
 extern int queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-extern int queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay);
+extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
 extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
+extern int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *work, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int schedule_work(struct work_struct *work);
 extern int schedule_work_on(int cpu, struct work_struct *work);
-extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay);
+extern int schedule_work(struct work_struct *work);
 extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
-					unsigned long delay);
+				    unsigned long delay);
+extern int schedule_delayed_work(struct delayed_work *work,
+				 unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 692d97628a10..07d309e7e359 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1052,27 +1052,6 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	spin_unlock_irqrestore(&gcwq->lock, flags);
 }
 
-/**
- * queue_work - queue work on a workqueue
- * @wq: workqueue to use
- * @work: work to queue
- *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
- *
- * We queue the work to the CPU on which it was submitted, but if the CPU dies
- * it can be processed by another CPU.
- */
-int queue_work(struct workqueue_struct *wq, struct work_struct *work)
-{
-	int ret;
-
-	ret = queue_work_on(get_cpu(), wq, work);
-	put_cpu();
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(queue_work);
-
 /**
  * queue_work_on - queue work on specific cpu
  * @cpu: CPU number to execute work on
@@ -1097,31 +1076,34 @@ queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work_on);
 
-static void delayed_work_timer_fn(unsigned long __data)
-{
-	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
-
-	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
-}
-
 /**
- * queue_delayed_work - queue work on a workqueue after delay
+ * queue_work - queue work on a workqueue
  * @wq: workqueue to use
- * @dwork: delayable work to queue
- * @delay: number of jiffies to wait before queueing
+ * @work: work to queue
  *
  * Returns 0 if @work was already on a queue, non-zero otherwise.
+ *
+ * We queue the work to the CPU on which it was submitted, but if the CPU dies
+ * it can be processed by another CPU.
  */
-int queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay)
+int queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	if (delay == 0)
-		return queue_work(wq, &dwork->work);
+	int ret;
 
-	return queue_delayed_work_on(-1, wq, dwork, delay);
+	ret = queue_work_on(get_cpu(), wq, work);
+	put_cpu();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work);
+
+static void delayed_work_timer_fn(unsigned long __data)
+{
+	struct delayed_work *dwork = (struct delayed_work *)__data;
+	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
+
+	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 }
-EXPORT_SYMBOL_GPL(queue_delayed_work);
 
 /**
  * queue_delayed_work_on - queue work on specific CPU after delay
@@ -1178,6 +1160,24 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
+/**
+ * queue_delayed_work - queue work on a workqueue after delay
+ * @wq: workqueue to use
+ * @dwork: delayable work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Returns 0 if @work was already on a queue, non-zero otherwise.
+ */
+int queue_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *dwork, unsigned long delay)
+{
+	if (delay == 0)
+		return queue_work(wq, &dwork->work);
+
+	return queue_delayed_work_on(-1, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(queue_delayed_work);
+
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
@@ -2877,6 +2877,19 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
+/*
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+int schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, system_wq, work);
+}
+EXPORT_SYMBOL(schedule_work_on);
+
 /**
  * schedule_work - put work task in global workqueue
  * @work: job to be done
@@ -2894,18 +2907,21 @@ int schedule_work(struct work_struct *work)
 }
 EXPORT_SYMBOL(schedule_work);
 
-/*
- * schedule_work_on - put work task on a specific cpu
- * @cpu: cpu to put the work task on
- * @work: job to be done
+/**
+ * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
+ * @cpu: cpu to use
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait
  *
- * This puts a job on a specific cpu
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue on the specified CPU.
  */
-int schedule_work_on(int cpu, struct work_struct *work)
+int schedule_delayed_work_on(int cpu,
+			struct delayed_work *dwork, unsigned long delay)
 {
-	return queue_work_on(cpu, system_wq, work);
+	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
-EXPORT_SYMBOL(schedule_work_on);
+EXPORT_SYMBOL(schedule_delayed_work_on);
 
 /**
  * schedule_delayed_work - put work task in global workqueue after delay
@@ -2922,22 +2938,6 @@ int schedule_delayed_work(struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
-/**
- * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
- * @cpu: cpu to use
- * @dwork: job to be done
- * @delay: number of jiffies to wait
- *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue on the specified CPU.
- */
-int schedule_delayed_work_on(int cpu,
-			struct delayed_work *dwork, unsigned long delay)
-{
-	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
-}
-EXPORT_SYMBOL(schedule_delayed_work_on);
-
 /**
  * schedule_on_each_cpu - execute a function synchronously on each online CPU
  * @func: the function to call
-- 
cgit v1.2.3


From d4283e9378619c14dc3826a6b0527eb5d967ffde Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:44 -0700
Subject: workqueue: make queueing functions return bool

All queueing functions return 1 on success, 0 if the work item was
already pending.  Update them to return bool instead.  This signifies
better that they don't return 0 / -errno.

This is cleanup and doesn't cause any functional difference.

While at it, fix comment opening for schedule_work_on().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 20 ++++++++++----------
 kernel/workqueue.c        | 47 +++++++++++++++++++++++------------------------
 2 files changed, 33 insertions(+), 34 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 597034276611..278dc5ddb73f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -365,24 +365,24 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-extern int queue_work_on(int cpu, struct workqueue_struct *wq,
+extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
-extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work);
+extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
-extern int queue_delayed_work(struct workqueue_struct *wq,
+extern bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern int schedule_work_on(int cpu, struct work_struct *work);
-extern int schedule_work(struct work_struct *work);
-extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
-				    unsigned long delay);
-extern int schedule_delayed_work(struct delayed_work *work,
-				 unsigned long delay);
+extern bool schedule_work_on(int cpu, struct work_struct *work);
+extern bool schedule_work(struct work_struct *work);
+extern bool schedule_delayed_work_on(int cpu, struct delayed_work *work,
+				     unsigned long delay);
+extern bool schedule_delayed_work(struct delayed_work *work,
+				  unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
 extern int keventd_up(void);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 07d309e7e359..70f95ab28f3d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1058,19 +1058,19 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
  * @wq: workqueue to use
  * @work: work to queue
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  *
  * We queue the work to a specific CPU, the caller must ensure it
  * can't go away.
  */
-int
-queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
+bool queue_work_on(int cpu, struct workqueue_struct *wq,
+		   struct work_struct *work)
 {
-	int ret = 0;
+	bool ret = false;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		__queue_work(cpu, wq, work);
-		ret = 1;
+		ret = true;
 	}
 	return ret;
 }
@@ -1081,14 +1081,14 @@ EXPORT_SYMBOL_GPL(queue_work_on);
  * @wq: workqueue to use
  * @work: work to queue
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  *
  * We queue the work to the CPU on which it was submitted, but if the CPU dies
  * it can be processed by another CPU.
  */
-int queue_work(struct workqueue_struct *wq, struct work_struct *work)
+bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 {
-	int ret;
+	bool ret;
 
 	ret = queue_work_on(get_cpu(), wq, work);
 	put_cpu();
@@ -1112,14 +1112,14 @@ static void delayed_work_timer_fn(unsigned long __data)
  * @dwork: work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  */
-int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay)
+bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			   struct delayed_work *dwork, unsigned long delay)
 {
-	int ret = 0;
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
+	bool ret = false;
 
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		unsigned int lcpu;
@@ -1154,7 +1154,7 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			add_timer_on(timer, cpu);
 		else
 			add_timer(timer);
-		ret = 1;
+		ret = true;
 	}
 	return ret;
 }
@@ -1166,9 +1166,9 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on);
  * @dwork: delayable work to queue
  * @delay: number of jiffies to wait before queueing
  *
- * Returns 0 if @work was already on a queue, non-zero otherwise.
+ * Returns %false if @work was already on a queue, %true otherwise.
  */
-int queue_delayed_work(struct workqueue_struct *wq,
+bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *dwork, unsigned long delay)
 {
 	if (delay == 0)
@@ -2877,14 +2877,14 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-/*
+/**
  * schedule_work_on - put work task on a specific cpu
  * @cpu: cpu to put the work task on
  * @work: job to be done
  *
  * This puts a job on a specific cpu
  */
-int schedule_work_on(int cpu, struct work_struct *work)
+bool schedule_work_on(int cpu, struct work_struct *work)
 {
 	return queue_work_on(cpu, system_wq, work);
 }
@@ -2894,14 +2894,14 @@ EXPORT_SYMBOL(schedule_work_on);
  * schedule_work - put work task in global workqueue
  * @work: job to be done
  *
- * Returns zero if @work was already on the kernel-global workqueue and
- * non-zero otherwise.
+ * Returns %false if @work was already on the kernel-global workqueue and
+ * %true otherwise.
  *
  * This puts a job in the kernel-global workqueue if it was not already
  * queued and leaves it in the same position on the kernel-global
  * workqueue otherwise.
  */
-int schedule_work(struct work_struct *work)
+bool schedule_work(struct work_struct *work)
 {
 	return queue_work(system_wq, work);
 }
@@ -2916,8 +2916,8 @@ EXPORT_SYMBOL(schedule_work);
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue on the specified CPU.
  */
-int schedule_delayed_work_on(int cpu,
-			struct delayed_work *dwork, unsigned long delay)
+bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+			      unsigned long delay)
 {
 	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
 }
@@ -2931,8 +2931,7 @@ EXPORT_SYMBOL(schedule_delayed_work_on);
  * After waiting for a given time this puts a job in the kernel-global
  * workqueue.
  */
-int schedule_delayed_work(struct delayed_work *dwork,
-					unsigned long delay)
+bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
 {
 	return queue_delayed_work(system_wq, dwork, delay);
 }
-- 
cgit v1.2.3


From d8e794dfd51c368ed3f686b7f4172830b60ae47b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:45 -0700
Subject: workqueue: set delayed_work->timer function on initialization

delayed_work->timer.function is currently initialized during
queue_delayed_work_on().  Export delayed_work_timer_fn() and set
delayed_work timer function during delayed_work initialization
together with other fields.

This ensures the timer function is always valid on an initialized
delayed_work.  This is to help mod_delayed_work() implementation.

To detect delayed_work users which diddle with the internal timer,
trigger WARN if timer function doesn't match on queue.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 13 +++++++++++--
 kernel/workqueue.c        |  7 ++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 278dc5ddb73f..ab95fef38d56 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -16,6 +16,7 @@ struct workqueue_struct;
 
 struct work_struct;
 typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
 
 /*
  * The first word is the work queue pointer and the flags rolled into
@@ -124,12 +125,14 @@ struct execute_work {
 
 #define __DELAYED_WORK_INITIALIZER(n, f) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_INITIALIZER(NULL, 0, 0),			\
+	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,	\
+				0, (unsigned long)&(n)),	\
 	}
 
 #define __DEFERRED_WORK_INITIALIZER(n, f) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_DEFERRED_INITIALIZER(NULL, 0, 0),	\
+	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn, \
+				0, (unsigned long)&(n)),	\
 	}
 
 #define DECLARE_WORK(n, f)					\
@@ -207,18 +210,24 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer(&(_work)->timer);			\
+		(_work)->timer.function = delayed_work_timer_fn;\
+		(_work)->timer.data = (unsigned long)(_work);	\
 	} while (0)
 
 #define INIT_DELAYED_WORK_ONSTACK(_work, _func)			\
 	do {							\
 		INIT_WORK_ONSTACK(&(_work)->work, (_func));	\
 		init_timer_on_stack(&(_work)->timer);		\
+		(_work)->timer.function = delayed_work_timer_fn;\
+		(_work)->timer.data = (unsigned long)(_work);	\
 	} while (0)
 
 #define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)		\
 	do {							\
 		INIT_WORK(&(_work)->work, (_func));		\
 		init_timer_deferrable(&(_work)->timer);		\
+		(_work)->timer.function = delayed_work_timer_fn;\
+		(_work)->timer.data = (unsigned long)(_work);	\
 	} while (0)
 
 /**
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 30474c4e107c..55392385fe30 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1112,7 +1112,7 @@ bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(queue_work);
 
-static void delayed_work_timer_fn(unsigned long __data)
+void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
@@ -1121,6 +1121,7 @@ static void delayed_work_timer_fn(unsigned long __data)
 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
 	local_irq_enable();
 }
+EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
 /**
  * queue_delayed_work_on - queue work on specific CPU after delay
@@ -1145,6 +1146,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
 		unsigned int lcpu;
 
+		WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
+			     timer->data != (unsigned long)dwork);
 		BUG_ON(timer_pending(timer));
 		BUG_ON(!list_empty(&work->entry));
 
@@ -1168,8 +1171,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 		set_work_cwq(work, get_cwq(lcpu, wq), 0);
 
 		timer->expires = jiffies + delay;
-		timer->data = (unsigned long)dwork;
-		timer->function = delayed_work_timer_fn;
 
 		if (unlikely(cpu >= 0))
 			add_timer_on(timer, cpu);
-- 
cgit v1.2.3


From b5490077274482efde57a50b060b99bc839acd45 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: introduce WORK_OFFQ_FLAG_*

Low WORK_STRUCT_FLAG_BITS bits of work_struct->data contain
WORK_STRUCT_FLAG_* and flush color.  If the work item is queued, the
rest point to the cpu_workqueue with WORK_STRUCT_CWQ set; otherwise,
WORK_STRUCT_CWQ is clear and the bits contain the last CPU number -
either a real CPU number or one of WORK_CPU_*.

Scheduled addition of mod_delayed_work[_on]() requires an additional
flag, which is used only while a work item is off queue.  There are
more than enough bits to represent off-queue CPU number on both 32 and
64bits.  This patch introduces WORK_OFFQ_FLAG_* which occupy the lower
part of the @work->data high bits while off queue.  This patch doesn't
define any actual OFFQ flag yet.

Off-queue CPU number is now shifted by WORK_OFFQ_CPU_SHIFT, which adds
the number of bits used by OFFQ flags to WORK_STRUCT_FLAG_SHIFT, to
make room for OFFQ flags.

To avoid shift width warning with large WORK_OFFQ_FLAG_BITS, ulong
cast is added to WORK_STRUCT_NO_CPU and, just in case, BUILD_BUG_ON()
to check that there are enough bits to accomodate off-queue CPU number
is added.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  8 +++++++-
 kernel/workqueue.c        | 14 +++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ab95fef38d56..f562674db404 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -68,9 +68,15 @@ enum {
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
 
+	/* data contains off-queue information when !WORK_STRUCT_CWQ */
+	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
+	WORK_OFFQ_FLAG_BITS	= 0,
+	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+
+	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= WORK_CPU_NONE << WORK_STRUCT_FLAG_BITS,
+	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0f50f4078e36..eeae77079483 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -533,9 +533,9 @@ static int work_next_color(int color)
 }
 
 /*
- * A work's data points to the cwq with WORK_STRUCT_CWQ set while the
- * work is on queue.  Once execution starts, WORK_STRUCT_CWQ is
- * cleared and the work data contains the cpu number it was last on.
+ * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * is cleared and the high bits contain OFFQ flags and CPU number.
  *
  * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
  * can be used to set the cwq, cpu or clear work->data.  These functions
@@ -565,7 +565,7 @@ static void set_work_cwq(struct work_struct *work,
 static void set_work_cpu_and_clear_pending(struct work_struct *work,
 					   unsigned int cpu)
 {
-	set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0);
+	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
@@ -592,7 +592,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 		return ((struct cpu_workqueue_struct *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
-	cpu = data >> WORK_STRUCT_FLAG_BITS;
+	cpu = data >> WORK_OFFQ_CPU_SHIFT;
 	if (cpu == WORK_CPU_NONE)
 		return NULL;
 
@@ -3724,6 +3724,10 @@ static int __init init_workqueues(void)
 	unsigned int cpu;
 	int i;
 
+	/* make sure we have enough bits for OFFQ CPU number */
+	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
+		     WORK_CPU_LAST);
+
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
 
-- 
cgit v1.2.3


From bbb68dfaba73e8338fe0f1dc711cc1d261daec87 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:46 -0700
Subject: workqueue: mark a work item being canceled as such

There can be two reasons try_to_grab_pending() can fail with -EAGAIN.
One is when someone else is queueing or deqeueing the work item.  With
the previous patches, it is guaranteed that PENDING and queued state
will soon agree making it safe to busy-retry in this case.

The other is if multiple __cancel_work_timer() invocations are racing
one another.  __cancel_work_timer() grabs PENDING and then waits for
running instances of the target work item on all CPUs while holding
PENDING and !queued.  try_to_grab_pending() invoked from another task
will keep returning -EAGAIN while the current owner is waiting.

Not distinguishing the two cases is okay because __cancel_work_timer()
is the only user of try_to_grab_pending() and it invokes
wait_on_work() whenever grabbing fails.  For the first case, busy
looping should be fine but wait_on_work() doesn't cause any critical
problem.  For the latter case, the new contender usually waits for the
same condition as the current owner, so no unnecessarily extended
busy-looping happens.  Combined, these make __cancel_work_timer()
technically correct even without irq protection while grabbing PENDING
or distinguishing the two different cases.

While the current code is technically correct, not distinguishing the
two cases makes it difficult to use try_to_grab_pending() for other
purposes than canceling because it's impossible to tell whether it's
safe to busy-retry grabbing.

This patch adds a mechanism to mark a work item being canceled.
try_to_grab_pending() now disables irq on success and returns -EAGAIN
to indicate that grabbing failed but PENDING and queued states are
gonna agree soon and it's safe to busy-loop.  It returns -ENOENT if
the work item is being canceled and it may stay PENDING && !queued for
arbitrary amount of time.

__cancel_work_timer() is modified to mark the work canceling with
WORK_OFFQ_CANCELING after grabbing PENDING, thus making
try_to_grab_pending() fail with -ENOENT instead of -EAGAIN.  Also, it
invokes wait_on_work() iff grabbing failed with -ENOENT.  This isn't
necessary for correctness but makes it consistent with other future
users of try_to_grab_pending().

v2: try_to_grab_pending() was testing preempt_count() to ensure that
    the caller has disabled preemption.  This triggers spuriously if
    !CONFIG_PREEMPT_COUNT.  Use preemptible() instead.  Reported by
    Fengguang Wu.

v3: Updated so that try_to_grab_pending() disables irq on success
    rather than requiring preemption disabled by the caller.  This
    makes busy-looping easier and will allow try_to_grap_pending() to
    be used from bh/irq contexts.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Fengguang Wu <fengguang.wu@intel.com>
---
 include/linux/workqueue.h |  5 ++-
 kernel/workqueue.c        | 90 +++++++++++++++++++++++++++++++++++++----------
 2 files changed, 76 insertions(+), 19 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f562674db404..5f4aeaa9f3e6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,10 @@ enum {
 
 	/* data contains off-queue information when !WORK_STRUCT_CWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
-	WORK_OFFQ_FLAG_BITS	= 0,
+
+	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
+
+	WORK_OFFQ_FLAG_BITS	= 1,
 	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
 
 	/* convenience constants */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4b3663b1c677..b4a4e05c89e1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -537,15 +537,20 @@ static int work_next_color(int color)
  * contain the pointer to the queued cwq.  Once execution starts, the flag
  * is cleared and the high bits contain OFFQ flags and CPU number.
  *
- * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data()
- * can be used to set the cwq, cpu or clear work->data.  These functions
- * should only be called while the work is owned - ie. while the PENDING
- * bit is set.
+ * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, cpu or clear
+ * work->data.  These functions should only be called while the work is
+ * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq
- * corresponding to a work.  gcwq is available once the work has been
- * queued anywhere after initialization.  cwq is available only from
- * queueing until execution starts.
+ * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
+ * a work.  gcwq is available once the work has been queued anywhere after
+ * initialization until it is sync canceled.  cwq is available only while
+ * the work item is queued.
+ *
+ * %WORK_OFFQ_CANCELING is used to mark a work item which is being
+ * canceled.  While being canceled, a work item may have its PENDING set
+ * but stay off timer and worklist for arbitrarily long and nobody should
+ * try to steal the PENDING bit.
  */
 static inline void set_work_data(struct work_struct *work, unsigned long data,
 				 unsigned long flags)
@@ -600,6 +605,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 	return get_gcwq(cpu);
 }
 
+static void mark_work_canceling(struct work_struct *work)
+{
+	struct global_cwq *gcwq = get_work_gcwq(work);
+	unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+
+	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
+		      WORK_STRUCT_PENDING);
+}
+
+static bool work_is_canceling(struct work_struct *work)
+{
+	unsigned long data = atomic_long_read(&work->data);
+
+	return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+}
+
 /*
  * Policy functions.  These define the policies on how the global worker
  * pools are managed.  Unless noted otherwise, these functions assume that
@@ -1005,9 +1026,10 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
 }
 
 /**
- * try_to_grab_pending - steal work item from worklist
+ * try_to_grab_pending - steal work item from worklist and disable irq
  * @work: work item to steal
  * @is_dwork: @work is a delayed_work
+ * @flags: place to store irq state
  *
  * Try to grab PENDING bit of @work.  This function can handle @work in any
  * stable state - idle, on timer or on worklist.  Return values are
@@ -1015,13 +1037,30 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
  *  1		if @work was pending and we successfully stole PENDING
  *  0		if @work was idle and we claimed PENDING
  *  -EAGAIN	if PENDING couldn't be grabbed at the moment, safe to busy-retry
+ *  -ENOENT	if someone else is canceling @work, this state may persist
+ *		for arbitrarily long
  *
- * On >= 0 return, the caller owns @work's PENDING bit.
+ * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
+ * preempted while holding PENDING and @work off queue, preemption must be
+ * disabled on entry.  This ensures that we don't return -EAGAIN while
+ * another task is preempted in this function.
+ *
+ * On successful return, >= 0, irq is disabled and the caller is
+ * responsible for releasing it using local_irq_restore(*@flags).
+ *
+ * This function is safe to call from any context other than IRQ handler.
+ * An IRQ handler may run on top of delayed_work_timer_fn() which can make
+ * this function return -EAGAIN perpetually.
  */
-static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
+static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
+			       unsigned long *flags)
 {
 	struct global_cwq *gcwq;
 
+	WARN_ON_ONCE(in_irq());
+
+	local_irq_save(*flags);
+
 	/* try to steal the timer if it exists */
 	if (is_dwork) {
 		struct delayed_work *dwork = to_delayed_work(work);
@@ -1040,9 +1079,9 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
 	 */
 	gcwq = get_work_gcwq(work);
 	if (!gcwq)
-		return -EAGAIN;
+		goto fail;
 
-	spin_lock_irq(&gcwq->lock);
+	spin_lock(&gcwq->lock);
 	if (!list_empty(&work->entry)) {
 		/*
 		 * This work is queued, but perhaps we locked the wrong gcwq.
@@ -1057,12 +1096,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
 				get_work_color(work),
 				*work_data_bits(work) & WORK_STRUCT_DELAYED);
 
-			spin_unlock_irq(&gcwq->lock);
+			spin_unlock(&gcwq->lock);
 			return 1;
 		}
 	}
-	spin_unlock_irq(&gcwq->lock);
-
+	spin_unlock(&gcwq->lock);
+fail:
+	local_irq_restore(*flags);
+	if (work_is_canceling(work))
+		return -ENOENT;
+	cpu_relax();
 	return -EAGAIN;
 }
 
@@ -2839,13 +2882,24 @@ EXPORT_SYMBOL_GPL(flush_work_sync);
 
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+	unsigned long flags;
 	int ret;
 
 	do {
-		ret = try_to_grab_pending(work, is_dwork);
-		wait_on_work(work);
+		ret = try_to_grab_pending(work, is_dwork, &flags);
+		/*
+		 * If someone else is canceling, wait for the same event it
+		 * would be waiting for before retrying.
+		 */
+		if (unlikely(ret == -ENOENT))
+			wait_on_work(work);
 	} while (unlikely(ret < 0));
 
+	/* tell other tasks trying to grab @work to back off */
+	mark_work_canceling(work);
+	local_irq_restore(flags);
+
+	wait_on_work(work);
 	clear_work_data(work);
 	return ret;
 }
-- 
cgit v1.2.3


From 8376fe22c7e79c7e90857d39f82aeae6cad6c4b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Aug 2012 10:30:47 -0700
Subject: workqueue: implement mod_delayed_work[_on]()

Workqueue was lacking a mechanism to modify the timeout of an already
pending delayed_work.  delayed_work users have been working around
this using several methods - using an explicit timer + work item,
messing directly with delayed_work->timer, and canceling before
re-queueing, all of which are error-prone and/or ugly.

This patch implements mod_delayed_work[_on]() which behaves similarly
to mod_timer() - if the delayed_work is idle, it's queued with the
given delay; otherwise, its timeout is modified to the new value.
Zero @delay guarantees immediate execution.

v2: Updated to reflect try_to_grab_pending() changes.  Now safe to be
    called from bh context.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
---
 include/linux/workqueue.h |  4 ++++
 kernel/workqueue.c        | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5f4aeaa9f3e6..20000305a8a6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -390,6 +390,10 @@ extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
 extern bool queue_delayed_work(struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
+extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			struct delayed_work *dwork, unsigned long delay);
+extern bool mod_delayed_work(struct workqueue_struct *wq,
+			struct delayed_work *dwork, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b4a4e05c89e1..41ae2c0979fe 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1413,6 +1413,59 @@ bool queue_delayed_work(struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work);
 
+/**
+ * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * If @dwork is idle, equivalent to queue_delayed_work_on(); otherwise,
+ * modify @dwork's timer so that it expires after @delay.  If @delay is
+ * zero, @work is guaranteed to be scheduled immediately regardless of its
+ * current state.
+ *
+ * Returns %false if @dwork was idle and queued, %true if @dwork was
+ * pending and its timer was modified.
+ *
+ * This function is safe to call from any context other than IRQ handler.
+ * See try_to_grab_pending() for details.
+ */
+bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
+			 struct delayed_work *dwork, unsigned long delay)
+{
+	unsigned long flags;
+	int ret;
+
+	do {
+		ret = try_to_grab_pending(&dwork->work, true, &flags);
+	} while (unlikely(ret == -EAGAIN));
+
+	if (likely(ret >= 0)) {
+		__queue_delayed_work(cpu, wq, dwork, delay);
+		local_irq_restore(flags);
+	}
+
+	/* -ENOENT from try_to_grab_pending() becomes %true */
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work_on);
+
+/**
+ * mod_delayed_work - modify delay of or queue a delayed work
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * mod_delayed_work_on() on local CPU.
+ */
+bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
+		      unsigned long delay)
+{
+	return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+EXPORT_SYMBOL_GPL(mod_delayed_work);
+
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
-- 
cgit v1.2.3


From 1265057fa02c7bed3b6d9ddc8a2048065a370364 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 8 Aug 2012 09:38:42 -0700
Subject: workqueue: fix CPU binding of flush_delayed_work[_sync]()

delayed_work encodes the workqueue to use and the last CPU in
delayed_work->work.data while it's on timer.  The target CPU is
implicitly recorded as the CPU the timer is queued on and
delayed_work_timer_fn() queues delayed_work->work to the CPU it is
running on.

Unfortunately, this leaves flush_delayed_work[_sync]() no way to find
out which CPU the delayed_work was queued for when they try to
re-queue after killing the timer.  Currently, it chooses the local CPU
flush is running on.  This can unexpectedly move a delayed_work queued
on a specific CPU to another CPU and lead to subtle errors.

There isn't much point in trying to save several bytes in struct
delayed_work, which is already close to a hundred bytes on 64bit with
all debug options turned off.  This patch adds delayed_work->cpu to
remember the CPU it's queued for.

Note that if the timer is migrated during CPU down, the work item
could be queued to the downed global_cwq after this change.  As a
detached global_cwq behaves like an unbound one, this doesn't change
much for the delayed_work.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h | 1 +
 kernel/workqueue.c        | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 20000305a8a6..b14d5d59af7c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -102,6 +102,7 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+	int cpu;
 };
 
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 41ae2c0979fe..11723c5b2b20 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1319,7 +1319,7 @@ void delayed_work_timer_fn(unsigned long __data)
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	local_irq_disable();
-	__queue_work(WORK_CPU_UNBOUND, cwq->wq, &dwork->work);
+	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
 	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
@@ -1356,6 +1356,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	set_work_cwq(work, get_cwq(lcpu, wq), 0);
 
+	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
 	if (unlikely(cpu != WORK_CPU_UNBOUND))
@@ -2997,7 +2998,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(WORK_CPU_UNBOUND,
+		__queue_work(dwork->cpu,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
@@ -3020,7 +3021,7 @@ bool flush_delayed_work_sync(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(WORK_CPU_UNBOUND,
+		__queue_work(dwork->cpu,
 			     get_work_cwq(&dwork->work)->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work_sync(&dwork->work);
-- 
cgit v1.2.3


From 606a5020b9bdceb20b4f43e11db0054afa349028 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 20 Aug 2012 14:51:23 -0700
Subject: workqueue: gut flush[_delayed]_work_sync()

Now that all workqueues are non-reentrant, flush[_delayed]_work_sync()
are equivalent to flush[_delayed]_work().  Drop the separate
implementation and make them thin wrappers around
flush[_delayed]_work().

* start_flush_work() no longer takes @wait_executing as the only left
  user - flush_work() - always sets it to %true.

* __cancel_work_timer() uses flush_work() instead of wait_on_work().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  14 +++++-
 kernel/workqueue.c        | 122 ++++------------------------------------------
 2 files changed, 22 insertions(+), 114 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b14d5d59af7c..4f9d3bc161a2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -412,11 +412,9 @@ extern int keventd_up(void);
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
 extern bool flush_work(struct work_struct *work);
-extern bool flush_work_sync(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
-extern bool flush_delayed_work_sync(struct delayed_work *work);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
@@ -456,6 +454,18 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
+/* used to be different but now identical to flush_work(), deprecated */
+static inline bool flush_work_sync(struct work_struct *work)
+{
+	return flush_work(work);
+}
+
+/* used to be different but now identical to flush_delayed_work(), deprecated */
+static inline bool flush_delayed_work_sync(struct delayed_work *dwork)
+{
+	return flush_delayed_work(dwork);
+}
+
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c4feef9798ea..5f13a9a2c792 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2801,8 +2801,7 @@ reflush:
 }
 EXPORT_SYMBOL_GPL(drain_workqueue);
 
-static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
-			     bool wait_executing)
+static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct global_cwq *gcwq;
@@ -2824,13 +2823,12 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
 		cwq = get_work_cwq(work);
 		if (unlikely(!cwq || gcwq != cwq->pool->gcwq))
 			goto already_gone;
-	} else if (wait_executing) {
+	} else {
 		worker = find_worker_executing_work(gcwq, work);
 		if (!worker)
 			goto already_gone;
 		cwq = worker->current_cwq;
-	} else
-		goto already_gone;
+	}
 
 	insert_wq_barrier(cwq, barr, work, worker);
 	spin_unlock_irq(&gcwq->lock);
@@ -2857,15 +2855,8 @@ already_gone:
  * flush_work - wait for a work to finish executing the last queueing instance
  * @work: the work to flush
  *
- * Wait until @work has finished execution.  This function considers
- * only the last queueing instance of @work.  If @work has been
- * enqueued across different CPUs on a non-reentrant workqueue or on
- * multiple workqueues, @work might still be executing on return on
- * some of the CPUs from earlier queueing.
- *
- * If @work was queued only on a non-reentrant, ordered or unbound
- * workqueue, @work is guaranteed to be idle on return if it hasn't
- * been requeued since flush started.
+ * Wait until @work has finished execution.  @work is guaranteed to be idle
+ * on return if it hasn't been requeued since flush started.
  *
  * RETURNS:
  * %true if flush_work() waited for the work to finish execution,
@@ -2878,85 +2869,15 @@ bool flush_work(struct work_struct *work)
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	if (start_flush_work(work, &barr, true)) {
+	if (start_flush_work(work, &barr)) {
 		wait_for_completion(&barr.done);
 		destroy_work_on_stack(&barr.work);
 		return true;
-	} else
-		return false;
-}
-EXPORT_SYMBOL_GPL(flush_work);
-
-static bool wait_on_cpu_work(struct global_cwq *gcwq, struct work_struct *work)
-{
-	struct wq_barrier barr;
-	struct worker *worker;
-
-	spin_lock_irq(&gcwq->lock);
-
-	worker = find_worker_executing_work(gcwq, work);
-	if (unlikely(worker))
-		insert_wq_barrier(worker->current_cwq, &barr, work, worker);
-
-	spin_unlock_irq(&gcwq->lock);
-
-	if (unlikely(worker)) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
-		return true;
-	} else
+	} else {
 		return false;
-}
-
-static bool wait_on_work(struct work_struct *work)
-{
-	bool ret = false;
-	int cpu;
-
-	might_sleep();
-
-	lock_map_acquire(&work->lockdep_map);
-	lock_map_release(&work->lockdep_map);
-
-	for_each_gcwq_cpu(cpu)
-		ret |= wait_on_cpu_work(get_gcwq(cpu), work);
-	return ret;
-}
-
-/**
- * flush_work_sync - wait until a work has finished execution
- * @work: the work to flush
- *
- * Wait until @work has finished execution.  On return, it's
- * guaranteed that all queueing instances of @work which happened
- * before this function is called are finished.  In other words, if
- * @work hasn't been requeued since this function was called, @work is
- * guaranteed to be idle on return.
- *
- * RETURNS:
- * %true if flush_work_sync() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_work_sync(struct work_struct *work)
-{
-	struct wq_barrier barr;
-	bool pending, waited;
-
-	/* we'll wait for executions separately, queue barr only if pending */
-	pending = start_flush_work(work, &barr, false);
-
-	/* wait for executions to finish */
-	waited = wait_on_work(work);
-
-	/* wait for the pending one */
-	if (pending) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
 	}
-
-	return pending || waited;
 }
-EXPORT_SYMBOL_GPL(flush_work_sync);
+EXPORT_SYMBOL_GPL(flush_work);
 
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
@@ -2970,14 +2891,14 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 		 * would be waiting for before retrying.
 		 */
 		if (unlikely(ret == -ENOENT))
-			wait_on_work(work);
+			flush_work(work);
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */
 	mark_work_canceling(work);
 	local_irq_restore(flags);
 
-	wait_on_work(work);
+	flush_work(work);
 	clear_work_data(work);
 	return ret;
 }
@@ -3029,29 +2950,6 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
-/**
- * flush_delayed_work_sync - wait for a dwork to finish
- * @dwork: the delayed work to flush
- *
- * Delayed timer is cancelled and the pending work is queued for
- * execution immediately.  Other than timer handling, its behavior
- * is identical to flush_work_sync().
- *
- * RETURNS:
- * %true if flush_work_sync() waited for the work to finish execution,
- * %false if it was already idle.
- */
-bool flush_delayed_work_sync(struct delayed_work *dwork)
-{
-	local_irq_disable();
-	if (del_timer_sync(&dwork->timer))
-		__queue_work(dwork->cpu,
-			     get_work_cwq(&dwork->work)->wq, &dwork->work);
-	local_irq_enable();
-	return flush_work_sync(&dwork->work);
-}
-EXPORT_SYMBOL(flush_delayed_work_sync);
-
 /**
  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
  * @dwork: the delayed work cancel
-- 
cgit v1.2.3


From ae930e0f4e66fd540c6fbad9f1e2a7743d8b9afe Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 20 Aug 2012 14:51:23 -0700
Subject: workqueue: gut system_nrt[_freezable]_wq()

Now that all workqueues are non-reentrant, system[_freezable]_wq() are
equivalent to system_nrt[_freezable]_wq().  Replace the latter with
wrappers around system[_freezable]_wq().  The wrapping goes through
inline functions so that __deprecated can be added easily.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 23 ++++++++++++++---------
 kernel/workqueue.c        | 10 +---------
 2 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4f9d3bc161a2..855fcdaa2d72 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -297,10 +297,6 @@ enum {
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
- * system_nrt_wq is non-reentrant and guarantees that any given work
- * item is never executed in parallel by multiple CPUs.  Queue
- * flushing might take relatively long.
- *
  * system_unbound_wq is unbound workqueue.  Workers are not bound to
  * any specific CPU, not concurrency managed, and all queued works are
  * executed immediately as long as max_active limit is not reached and
@@ -308,16 +304,25 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
- *
- * system_nrt_freezable_wq is equivalent to system_nrt_wq except that
- * it's freezable.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
-extern struct workqueue_struct *system_nrt_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
-extern struct workqueue_struct *system_nrt_freezable_wq;
+
+static inline struct workqueue_struct *__system_nrt_wq(void)
+{
+	return system_wq;
+}
+
+static inline struct workqueue_struct *__system_nrt_freezable_wq(void)
+{
+	return system_freezable_wq;
+}
+
+/* equivlalent to system_wq and system_freezable_wq, deprecated */
+#define system_nrt_wq			__system_nrt_wq()
+#define system_nrt_freezable_wq		__system_nrt_freezable_wq()
 
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5f13a9a2c792..85bd3409b9f5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -274,14 +274,10 @@ struct workqueue_struct *system_highpri_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_highpri_wq);
 struct workqueue_struct *system_long_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_long_wq);
-struct workqueue_struct *system_nrt_wq __read_mostly;
-EXPORT_SYMBOL_GPL(system_nrt_wq);
 struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
-struct workqueue_struct *system_nrt_freezable_wq __read_mostly;
-EXPORT_SYMBOL_GPL(system_nrt_freezable_wq);
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/workqueue.h>
@@ -3838,16 +3834,12 @@ static int __init init_workqueues(void)
 	system_wq = alloc_workqueue("events", 0, 0);
 	system_highpri_wq = alloc_workqueue("events_highpri", WQ_HIGHPRI, 0);
 	system_long_wq = alloc_workqueue("events_long", 0, 0);
-	system_nrt_wq = alloc_workqueue("events_nrt", WQ_NON_REENTRANT, 0);
 	system_unbound_wq = alloc_workqueue("events_unbound", WQ_UNBOUND,
 					    WQ_UNBOUND_MAX_ACTIVE);
 	system_freezable_wq = alloc_workqueue("events_freezable",
 					      WQ_FREEZABLE, 0);
-	system_nrt_freezable_wq = alloc_workqueue("events_nrt_freezable",
-			WQ_NON_REENTRANT | WQ_FREEZABLE, 0);
 	BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-	       !system_nrt_wq || !system_unbound_wq || !system_freezable_wq ||
-	       !system_nrt_freezable_wq);
+	       !system_unbound_wq || !system_freezable_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From 43829731dd372d04d6706c51052b9dabab9ca356 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 20 Aug 2012 14:51:24 -0700
Subject: workqueue: deprecate flush[_delayed]_work_sync()

flush[_delayed]_work_sync() are now spurious.  Mark them deprecated
and convert all users to flush[_delayed]_work().

If you're cc'd and wondering what's going on: Now all workqueues are
non-reentrant and the regular flushes guarantee that the work item is
not pending or running on any CPU on return, so there's no reason to
use the sync flushes at all and they're going away.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Ian Campbell <ian.campbell@citrix.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mattia Dongili <malattia@linux.it>
Cc: Kent Yoder <key@linux.vnet.ibm.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: Bryan Wu <bryan.wu@canonical.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Alasdair Kergon <agk@redhat.com>
Cc: Mauro Carvalho Chehab <mchehab@infradead.org>
Cc: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: linux-wireless@vger.kernel.org
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: Sangbeom Kim <sbkim73@samsung.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Eric Van Hensbergen <ericvh@gmail.com>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Petr Vandrovec <petr@vandrovec.name>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Avi Kivity <avi@redhat.com>
---
 arch/arm/mach-pxa/sharpsl_pm.c                  |  4 ++--
 arch/arm/plat-omap/mailbox.c                    |  2 +-
 arch/sh/drivers/push-switch.c                   |  2 +-
 drivers/block/xen-blkfront.c                    |  4 ++--
 drivers/cdrom/gdrom.c                           |  2 +-
 drivers/char/sonypi.c                           |  2 +-
 drivers/char/tpm/tpm.c                          |  4 ++--
 drivers/gpu/drm/exynos/exynos_drm_g2d.c         |  2 +-
 drivers/gpu/drm/nouveau/nouveau_gpio.c          |  2 +-
 drivers/gpu/drm/radeon/radeon_irq_kms.c         |  2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c              |  2 +-
 drivers/hid/hid-picolcd.c                       |  2 +-
 drivers/input/touchscreen/wm831x-ts.c           |  2 +-
 drivers/isdn/mISDN/hwchannel.c                  |  4 ++--
 drivers/leds/leds-lm3533.c                      |  6 +++---
 drivers/leds/leds-lp8788.c                      |  2 +-
 drivers/leds/leds-wm8350.c                      |  2 +-
 drivers/macintosh/ams/ams-core.c                |  2 +-
 drivers/md/dm-mpath.c                           |  2 +-
 drivers/md/dm-raid1.c                           |  2 +-
 drivers/md/dm-stripe.c                          |  2 +-
 drivers/media/dvb/dvb-core/dvb_net.c            |  4 ++--
 drivers/media/dvb/mantis/mantis_evm.c           |  2 +-
 drivers/media/dvb/mantis/mantis_uart.c          |  2 +-
 drivers/media/video/bt8xx/bttv-driver.c         |  2 +-
 drivers/media/video/cx18/cx18-driver.c          |  2 +-
 drivers/media/video/cx231xx/cx231xx-cards.c     |  2 +-
 drivers/media/video/cx23885/cx23885-input.c     |  6 +++---
 drivers/media/video/cx88/cx88-mpeg.c            |  2 +-
 drivers/media/video/em28xx/em28xx-cards.c       |  2 +-
 drivers/media/video/omap24xxcam.c               |  6 +++---
 drivers/media/video/saa7134/saa7134-core.c      |  2 +-
 drivers/media/video/saa7134/saa7134-empress.c   |  2 +-
 drivers/media/video/tm6000/tm6000-cards.c       |  2 +-
 drivers/mfd/menelaus.c                          |  4 ++--
 drivers/misc/ioc4.c                             |  2 +-
 drivers/mtd/mtdoops.c                           |  4 ++--
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c |  2 +-
 drivers/net/ethernet/neterion/vxge/vxge-main.c  |  2 +-
 drivers/net/ethernet/sun/cassini.c              |  2 +-
 drivers/net/ethernet/sun/niu.c                  |  2 +-
 drivers/net/wireless/hostap/hostap_ap.c         |  4 ++--
 drivers/net/wireless/hostap/hostap_hw.c         | 10 +++++-----
 drivers/power/collie_battery.c                  |  2 +-
 drivers/power/tosa_battery.c                    |  2 +-
 drivers/power/wm97xx_battery.c                  |  2 +-
 drivers/power/z2_battery.c                      |  2 +-
 drivers/regulator/core.c                        |  2 +-
 drivers/scsi/arcmsr/arcmsr_hba.c                |  4 ++--
 drivers/scsi/ipr.c                              |  2 +-
 drivers/scsi/pmcraid.c                          |  2 +-
 drivers/scsi/qla2xxx/qla_target.c               |  2 +-
 drivers/tty/hvc/hvsi.c                          |  2 +-
 drivers/tty/ipwireless/hardware.c               |  2 +-
 drivers/tty/ipwireless/network.c                |  4 ++--
 drivers/tty/serial/kgdboc.c                     |  2 +-
 drivers/tty/serial/omap-serial.c                |  2 +-
 drivers/tty/tty_ldisc.c                         |  6 +++---
 drivers/usb/atm/speedtch.c                      |  2 +-
 drivers/usb/atm/ueagle-atm.c                    |  2 +-
 drivers/usb/gadget/u_ether.c                    |  2 +-
 drivers/usb/host/ohci-hcd.c                     |  2 +-
 drivers/usb/otg/isp1301_omap.c                  |  2 +-
 fs/affs/super.c                                 |  2 +-
 fs/gfs2/lock_dlm.c                              |  2 +-
 fs/gfs2/super.c                                 |  2 +-
 fs/hfs/inode.c                                  |  2 +-
 fs/ncpfs/inode.c                                |  6 +++---
 fs/ocfs2/cluster/quorum.c                       |  2 +-
 fs/xfs/xfs_super.c                              |  2 +-
 fs/xfs/xfs_sync.c                               |  2 +-
 include/linux/workqueue.h                       |  4 ++--
 net/9p/trans_fd.c                               |  2 +-
 net/dsa/dsa.c                                   |  2 +-
 sound/i2c/other/ak4113.c                        |  2 +-
 sound/i2c/other/ak4114.c                        |  2 +-
 sound/pci/oxygen/oxygen_lib.c                   |  8 ++++----
 sound/soc/codecs/wm8350.c                       |  2 +-
 sound/soc/codecs/wm8753.c                       |  2 +-
 sound/soc/soc-core.c                            |  6 +++---
 virt/kvm/eventfd.c                              |  2 +-
 81 files changed, 111 insertions(+), 111 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/arm/mach-pxa/sharpsl_pm.c b/arch/arm/mach-pxa/sharpsl_pm.c
index bdf4cb88ca0a..7875ad6456bd 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.c
+++ b/arch/arm/mach-pxa/sharpsl_pm.c
@@ -579,8 +579,8 @@ static int sharpsl_ac_check(void)
 static int sharpsl_pm_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	sharpsl_pm.flags |= SHARPSL_SUSPENDED;
-	flush_delayed_work_sync(&toggle_charger);
-	flush_delayed_work_sync(&sharpsl_bat);
+	flush_delayed_work(&toggle_charger);
+	flush_delayed_work(&sharpsl_bat);
 
 	if (sharpsl_pm.charge_mode == CHRG_ON)
 		sharpsl_pm.flags |= SHARPSL_DO_OFFLINE_CHRG;
diff --git a/arch/arm/plat-omap/mailbox.c b/arch/arm/plat-omap/mailbox.c
index 5e13c3884aa4..42377ef9ea3d 100644
--- a/arch/arm/plat-omap/mailbox.c
+++ b/arch/arm/plat-omap/mailbox.c
@@ -310,7 +310,7 @@ static void omap_mbox_fini(struct omap_mbox *mbox)
 		omap_mbox_disable_irq(mbox, IRQ_RX);
 		free_irq(mbox->irq, mbox);
 		tasklet_kill(&mbox->txq->tasklet);
-		flush_work_sync(&mbox->rxq->work);
+		flush_work(&mbox->rxq->work);
 		mbox_queue_free(mbox->txq);
 		mbox_queue_free(mbox->rxq);
 	}
diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c
index 637b79b09657..5bfb341cc5c4 100644
--- a/arch/sh/drivers/push-switch.c
+++ b/arch/sh/drivers/push-switch.c
@@ -107,7 +107,7 @@ static int switch_drv_remove(struct platform_device *pdev)
 		device_remove_file(&pdev->dev, &dev_attr_switch);
 
 	platform_set_drvdata(pdev, NULL);
-	flush_work_sync(&psw->work);
+	flush_work(&psw->work);
 	del_timer_sync(&psw->debounce);
 	free_irq(irq, pdev);
 
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2c2d2e5c1597..007db8986e84 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -670,7 +670,7 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
 	spin_unlock_irqrestore(&info->io_lock, flags);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_work_sync(&info->work);
+	flush_work(&info->work);
 
 	del_gendisk(info->gd);
 
@@ -719,7 +719,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
 	spin_unlock_irq(&info->io_lock);
 
 	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_work_sync(&info->work);
+	flush_work(&info->work);
 
 	/* Free resources associated with old device channel. */
 	if (info->ring_ref != GRANT_INVALID_REF) {
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 3ceaf006e7f0..75d485afe56c 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -840,7 +840,7 @@ probe_fail_no_mem:
 
 static int __devexit remove_gdrom(struct platform_device *devptr)
 {
-	flush_work_sync(&work);
+	flush_work(&work);
 	blk_cleanup_queue(gd.gdrom_rq);
 	free_irq(HW_EVENT_GDROM_CMD, &gd);
 	free_irq(HW_EVENT_GDROM_DMA, &gd);
diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c
index f87780502b41..320debbe32fa 100644
--- a/drivers/char/sonypi.c
+++ b/drivers/char/sonypi.c
@@ -1433,7 +1433,7 @@ static int __devexit sonypi_remove(struct platform_device *dev)
 	sonypi_disable();
 
 	synchronize_irq(sonypi_device.irq);
-	flush_work_sync(&sonypi_device.input_work);
+	flush_work(&sonypi_device.input_work);
 
 	if (useinput) {
 		input_unregister_device(sonypi_device.input_key_dev);
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index 817f0ee202b6..3af9f4d1a23f 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -1172,7 +1172,7 @@ int tpm_release(struct inode *inode, struct file *file)
 	struct tpm_chip *chip = file->private_data;
 
 	del_singleshot_timer_sync(&chip->user_read_timer);
-	flush_work_sync(&chip->work);
+	flush_work(&chip->work);
 	file->private_data = NULL;
 	atomic_set(&chip->data_pending, 0);
 	kfree(chip->data_buffer);
@@ -1225,7 +1225,7 @@ ssize_t tpm_read(struct file *file, char __user *buf,
 	int rc;
 
 	del_singleshot_timer_sync(&chip->user_read_timer);
-	flush_work_sync(&chip->work);
+	flush_work(&chip->work);
 	ret_size = atomic_read(&chip->data_pending);
 	atomic_set(&chip->data_pending, 0);
 	if (ret_size > 0) {	/* relay data */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index d2d88f22a037..a40808e51338 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -908,7 +908,7 @@ static int g2d_suspend(struct device *dev)
 		/* FIXME: good range? */
 		usleep_range(500, 1000);
 
-	flush_work_sync(&g2d->runqueue_work);
+	flush_work(&g2d->runqueue_work);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_gpio.c b/drivers/gpu/drm/nouveau/nouveau_gpio.c
index 82c19e82ff02..0fe4e17c461d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gpio.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gpio.c
@@ -302,7 +302,7 @@ nouveau_gpio_isr_del(struct drm_device *dev, int idx, u8 tag, u8 line,
 		spin_unlock_irqrestore(&pgpio->lock, flags);
 
 		list_for_each_entry_safe(isr, tmp, &tofree, head) {
-			flush_work_sync(&isr->work);
+			flush_work(&isr->work);
 			kfree(isr);
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index afaa1727abd2..50b596ec7b7e 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -277,7 +277,7 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
 		if (rdev->msi_enabled)
 			pci_disable_msi(rdev->pdev);
 	}
-	flush_work_sync(&rdev->hotplug_work);
+	flush_work(&rdev->hotplug_work);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 3c447bf317cb..a32f2e96dd02 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -594,7 +594,7 @@ int vmw_fb_off(struct vmw_private *vmw_priv)
 	par->dirty.active = false;
 	spin_unlock_irqrestore(&par->dirty.lock, flags);
 
-	flush_delayed_work_sync(&info->deferred_work);
+	flush_delayed_work(&info->deferred_work);
 
 	par->bo_ptr = NULL;
 	ttm_bo_kunmap(&par->map);
diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c
index 27c8ebdfad01..3e90dee30ff6 100644
--- a/drivers/hid/hid-picolcd.c
+++ b/drivers/hid/hid-picolcd.c
@@ -2737,7 +2737,7 @@ static void __exit picolcd_exit(void)
 {
 	hid_unregister_driver(&picolcd_driver);
 #ifdef CONFIG_HID_PICOLCD_FB
-	flush_work_sync(&picolcd_fb_cleanup);
+	flush_work(&picolcd_fb_cleanup);
 	WARN_ON(fb_pending);
 #endif
 }
diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c
index e83410721e38..52abb98a8ae5 100644
--- a/drivers/input/touchscreen/wm831x-ts.c
+++ b/drivers/input/touchscreen/wm831x-ts.c
@@ -221,7 +221,7 @@ static void wm831x_ts_input_close(struct input_dev *idev)
 	synchronize_irq(wm831x_ts->pd_irq);
 
 	/* Make sure the IRQ completion work is quiesced */
-	flush_work_sync(&wm831x_ts->pd_data_work);
+	flush_work(&wm831x_ts->pd_data_work);
 
 	/* If we ended up with the pen down then make sure we revert back
 	 * to pen detection state for the next time we start up.
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
index ef34fd40867c..4b85f7279f18 100644
--- a/drivers/isdn/mISDN/hwchannel.c
+++ b/drivers/isdn/mISDN/hwchannel.c
@@ -116,7 +116,7 @@ mISDN_freedchannel(struct dchannel *ch)
 	}
 	skb_queue_purge(&ch->squeue);
 	skb_queue_purge(&ch->rqueue);
-	flush_work_sync(&ch->workq);
+	flush_work(&ch->workq);
 	return 0;
 }
 EXPORT_SYMBOL(mISDN_freedchannel);
@@ -157,7 +157,7 @@ mISDN_freebchannel(struct bchannel *ch)
 	mISDN_clear_bchannel(ch);
 	skb_queue_purge(&ch->rqueue);
 	ch->rcount = 0;
-	flush_work_sync(&ch->workq);
+	flush_work(&ch->workq);
 	return 0;
 }
 EXPORT_SYMBOL(mISDN_freebchannel);
diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c
index f56b6e7ffdac..f6837b99908c 100644
--- a/drivers/leds/leds-lm3533.c
+++ b/drivers/leds/leds-lm3533.c
@@ -737,7 +737,7 @@ err_sysfs_remove:
 	sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group);
 err_unregister:
 	led_classdev_unregister(&led->cdev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 
 	return ret;
 }
@@ -751,7 +751,7 @@ static int __devexit lm3533_led_remove(struct platform_device *pdev)
 	lm3533_ctrlbank_disable(&led->cb);
 	sysfs_remove_group(&led->cdev.dev->kobj, &lm3533_led_attribute_group);
 	led_classdev_unregister(&led->cdev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 
 	return 0;
 }
@@ -765,7 +765,7 @@ static void lm3533_led_shutdown(struct platform_device *pdev)
 
 	lm3533_ctrlbank_disable(&led->cb);
 	lm3533_led_set(&led->cdev, LED_OFF);		/* disable blink */
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 }
 
 static struct platform_driver lm3533_led_driver = {
diff --git a/drivers/leds/leds-lp8788.c b/drivers/leds/leds-lp8788.c
index 53bd136f1ef0..cb76bc46a021 100644
--- a/drivers/leds/leds-lp8788.c
+++ b/drivers/leds/leds-lp8788.c
@@ -172,7 +172,7 @@ static int __devexit lp8788_led_remove(struct platform_device *pdev)
 	struct lp8788_led *led = platform_get_drvdata(pdev);
 
 	led_classdev_unregister(&led->led_dev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 
 	return 0;
 }
diff --git a/drivers/leds/leds-wm8350.c b/drivers/leds/leds-wm8350.c
index 918d4baff1c7..4c62113f7a77 100644
--- a/drivers/leds/leds-wm8350.c
+++ b/drivers/leds/leds-wm8350.c
@@ -275,7 +275,7 @@ static int wm8350_led_remove(struct platform_device *pdev)
 	struct wm8350_led *led = platform_get_drvdata(pdev);
 
 	led_classdev_unregister(&led->cdev);
-	flush_work_sync(&led->work);
+	flush_work(&led->work);
 	wm8350_led_disable(led);
 	regulator_put(led->dcdc);
 	regulator_put(led->isink);
diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c
index 5c6a2d876562..36a4fdddd64a 100644
--- a/drivers/macintosh/ams/ams-core.c
+++ b/drivers/macintosh/ams/ams-core.c
@@ -226,7 +226,7 @@ void ams_sensor_detach(void)
 	 * We do this after ams_info.exit(), because an interrupt might
 	 * have arrived before disabling them.
 	 */
-	flush_work_sync(&ams_info.worker);
+	flush_work(&ams_info.worker);
 
 	/* Remove device */
 	of_device_unregister(ams_info.of_dev);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index d8abb90a6c2f..bb18eafcc85a 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -944,7 +944,7 @@ static void flush_multipath_work(struct multipath *m)
 	flush_workqueue(kmpath_handlerd);
 	multipath_wait_for_pg_init_completion(m);
 	flush_workqueue(kmultipathd);
-	flush_work_sync(&m->trigger_event);
+	flush_work(&m->trigger_event);
 }
 
 static void multipath_dtr(struct dm_target *ti)
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index bc5ddba8045b..fd61f98ee1f6 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1146,7 +1146,7 @@ static void mirror_dtr(struct dm_target *ti)
 
 	del_timer_sync(&ms->timer);
 	flush_workqueue(ms->kmirrord_wq);
-	flush_work_sync(&ms->trigger_event);
+	flush_work(&ms->trigger_event);
 	dm_kcopyd_client_destroy(ms->kcopyd_client);
 	destroy_workqueue(ms->kmirrord_wq);
 	free_context(ms, ti, ms->nr_mirrors);
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index a087bf2a8d66..e2f876539743 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -199,7 +199,7 @@ static void stripe_dtr(struct dm_target *ti)
 	for (i = 0; i < sc->stripes; i++)
 		dm_put_device(ti, sc->stripe[i].dev);
 
-	flush_work_sync(&sc->trigger_event);
+	flush_work(&sc->trigger_event);
 	kfree(sc);
 }
 
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 8766ce8c354d..c2117688aa23 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1329,8 +1329,8 @@ static int dvb_net_remove_if(struct dvb_net *dvbnet, unsigned long num)
 		return -EBUSY;
 
 	dvb_net_stop(net);
-	flush_work_sync(&priv->set_multicast_list_wq);
-	flush_work_sync(&priv->restart_net_feed_wq);
+	flush_work(&priv->set_multicast_list_wq);
+	flush_work(&priv->restart_net_feed_wq);
 	printk("dvb_net: removed network interface %s\n", net->name);
 	unregister_netdev(net);
 	dvbnet->state[num]=0;
diff --git a/drivers/media/dvb/mantis/mantis_evm.c b/drivers/media/dvb/mantis/mantis_evm.c
index 71ce52875c38..909ff54868a3 100644
--- a/drivers/media/dvb/mantis/mantis_evm.c
+++ b/drivers/media/dvb/mantis/mantis_evm.c
@@ -111,7 +111,7 @@ void mantis_evmgr_exit(struct mantis_ca *ca)
 	struct mantis_pci *mantis = ca->ca_priv;
 
 	dprintk(MANTIS_DEBUG, 1, "Mantis Host I/F Event manager exiting");
-	flush_work_sync(&ca->hif_evm_work);
+	flush_work(&ca->hif_evm_work);
 	mantis_hif_exit(ca);
 	mantis_pcmcia_exit(ca);
 }
diff --git a/drivers/media/dvb/mantis/mantis_uart.c b/drivers/media/dvb/mantis/mantis_uart.c
index 18340dafa426..85e977861b4a 100644
--- a/drivers/media/dvb/mantis/mantis_uart.c
+++ b/drivers/media/dvb/mantis/mantis_uart.c
@@ -183,6 +183,6 @@ void mantis_uart_exit(struct mantis_pci *mantis)
 {
 	/* disable interrupt */
 	mmwrite(mmread(MANTIS_UART_CTL) & 0xffef, MANTIS_UART_CTL);
-	flush_work_sync(&mantis->uart_work);
+	flush_work(&mantis->uart_work);
 }
 EXPORT_SYMBOL_GPL(mantis_uart_exit);
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index b58ff87db771..2ce7179a3864 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -196,7 +196,7 @@ static void request_modules(struct bttv *dev)
 
 static void flush_request_modules(struct bttv *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 7e5ffd6f5178..75c890907920 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -272,7 +272,7 @@ static void request_modules(struct cx18 *dev)
 
 static void flush_request_modules(struct cx18 *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c
index 02d4d36735d3..b84ebc54d91b 100644
--- a/drivers/media/video/cx231xx/cx231xx-cards.c
+++ b/drivers/media/video/cx231xx/cx231xx-cards.c
@@ -1002,7 +1002,7 @@ static void request_modules(struct cx231xx *dev)
 
 static void flush_request_modules(struct cx231xx *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/cx23885/cx23885-input.c b/drivers/media/video/cx23885/cx23885-input.c
index ce765e3f77bd..bcbf7faf1bab 100644
--- a/drivers/media/video/cx23885/cx23885-input.c
+++ b/drivers/media/video/cx23885/cx23885-input.c
@@ -231,9 +231,9 @@ static void cx23885_input_ir_stop(struct cx23885_dev *dev)
 		v4l2_subdev_call(dev->sd_ir, ir, rx_s_parameters, &params);
 		v4l2_subdev_call(dev->sd_ir, ir, rx_g_parameters, &params);
 	}
-	flush_work_sync(&dev->cx25840_work);
-	flush_work_sync(&dev->ir_rx_work);
-	flush_work_sync(&dev->ir_tx_work);
+	flush_work(&dev->cx25840_work);
+	flush_work(&dev->ir_rx_work);
+	flush_work(&dev->ir_tx_work);
 }
 
 static void cx23885_input_ir_close(struct rc_dev *rc)
diff --git a/drivers/media/video/cx88/cx88-mpeg.c b/drivers/media/video/cx88/cx88-mpeg.c
index cd5386ee210c..c04fb618e10b 100644
--- a/drivers/media/video/cx88/cx88-mpeg.c
+++ b/drivers/media/video/cx88/cx88-mpeg.c
@@ -70,7 +70,7 @@ static void request_modules(struct cx8802_dev *dev)
 
 static void flush_request_modules(struct cx8802_dev *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index ca62b9981380..f7831e73f077 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -2900,7 +2900,7 @@ static void request_modules(struct em28xx *dev)
 
 static void flush_request_modules(struct em28xx *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/media/video/omap24xxcam.c b/drivers/media/video/omap24xxcam.c
index e5015b0d5508..8d7283bbd431 100644
--- a/drivers/media/video/omap24xxcam.c
+++ b/drivers/media/video/omap24xxcam.c
@@ -1198,7 +1198,7 @@ static int vidioc_streamoff(struct file *file, void *fh, enum v4l2_buf_type i)
 
 	atomic_inc(&cam->reset_disable);
 
-	flush_work_sync(&cam->sensor_reset_work);
+	flush_work(&cam->sensor_reset_work);
 
 	rval = videobuf_streamoff(q);
 	if (!rval) {
@@ -1512,7 +1512,7 @@ static int omap24xxcam_release(struct file *file)
 
 	atomic_inc(&cam->reset_disable);
 
-	flush_work_sync(&cam->sensor_reset_work);
+	flush_work(&cam->sensor_reset_work);
 
 	/* stop streaming capture */
 	videobuf_streamoff(&fh->vbq);
@@ -1536,7 +1536,7 @@ static int omap24xxcam_release(struct file *file)
 	 * not be scheduled anymore since streaming is already
 	 * disabled.)
 	 */
-	flush_work_sync(&cam->sensor_reset_work);
+	flush_work(&cam->sensor_reset_work);
 
 	mutex_lock(&cam->mutex);
 	if (atomic_dec_return(&cam->users) == 0) {
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 5fbb4e49495c..f2b37e05b964 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -170,7 +170,7 @@ static void request_submodules(struct saa7134_dev *dev)
 
 static void flush_request_submodules(struct saa7134_dev *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 
 #else
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index dde361a9194e..4df79c656909 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -556,7 +556,7 @@ static int empress_fini(struct saa7134_dev *dev)
 
 	if (NULL == dev->empress_dev)
 		return 0;
-	flush_work_sync(&dev->empress_workqueue);
+	flush_work(&dev->empress_workqueue);
 	video_unregister_device(dev->empress_dev);
 	dev->empress_dev = NULL;
 	return 0;
diff --git a/drivers/media/video/tm6000/tm6000-cards.c b/drivers/media/video/tm6000/tm6000-cards.c
index 034659b13174..307d8c5fb7cd 100644
--- a/drivers/media/video/tm6000/tm6000-cards.c
+++ b/drivers/media/video/tm6000/tm6000-cards.c
@@ -1074,7 +1074,7 @@ static void request_modules(struct tm6000_core *dev)
 
 static void flush_request_modules(struct tm6000_core *dev)
 {
-	flush_work_sync(&dev->request_module_wk);
+	flush_work(&dev->request_module_wk);
 }
 #else
 #define request_modules(dev)
diff --git a/drivers/mfd/menelaus.c b/drivers/mfd/menelaus.c
index cb4910ac4d12..55d589981412 100644
--- a/drivers/mfd/menelaus.c
+++ b/drivers/mfd/menelaus.c
@@ -1259,7 +1259,7 @@ static int menelaus_probe(struct i2c_client *client,
 	return 0;
 fail2:
 	free_irq(client->irq, menelaus);
-	flush_work_sync(&menelaus->work);
+	flush_work(&menelaus->work);
 fail1:
 	kfree(menelaus);
 	return err;
@@ -1270,7 +1270,7 @@ static int __exit menelaus_remove(struct i2c_client *client)
 	struct menelaus_chip	*menelaus = i2c_get_clientdata(client);
 
 	free_irq(client->irq, menelaus);
-	flush_work_sync(&menelaus->work);
+	flush_work(&menelaus->work);
 	kfree(menelaus);
 	the_menelaus = NULL;
 	return 0;
diff --git a/drivers/misc/ioc4.c b/drivers/misc/ioc4.c
index df03dd3bd0e2..6a7710603a90 100644
--- a/drivers/misc/ioc4.c
+++ b/drivers/misc/ioc4.c
@@ -487,7 +487,7 @@ static void __exit
 ioc4_exit(void)
 {
 	/* Ensure ioc4_load_modules() has completed before exiting */
-	flush_work_sync(&ioc4_load_modules_work);
+	flush_work(&ioc4_load_modules_work);
 	pci_unregister_driver(&ioc4_driver);
 }
 
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index 551e316e4454..438737a1f59a 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -387,8 +387,8 @@ static void mtdoops_notify_remove(struct mtd_info *mtd)
 		printk(KERN_WARNING "mtdoops: could not unregister kmsg_dumper\n");
 
 	cxt->mtd = NULL;
-	flush_work_sync(&cxt->work_erase);
-	flush_work_sync(&cxt->work_write);
+	flush_work(&cxt->work_erase);
+	flush_work(&cxt->work_write);
 }
 
 
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 6505070abcfa..089ed134369b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -1394,7 +1394,7 @@ static int offload_close(struct t3cdev *tdev)
 	sysfs_remove_group(&tdev->lldev->dev.kobj, &offload_attr_group);
 
 	/* Flush work scheduled while releasing TIDs */
-	flush_work_sync(&td->tid_release_task);
+	flush_work(&td->tid_release_task);
 
 	tdev->lldev = NULL;
 	cxgb3_set_dummy_ops(tdev);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index de2190443510..30ce10bc9149 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3521,7 +3521,7 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev)
 
 	strncpy(buf, dev->name, IFNAMSIZ);
 
-	flush_work_sync(&vdev->reset_task);
+	flush_work(&vdev->reset_task);
 
 	/* in 2.6 will call stop() if device is up */
 	unregister_netdev(dev);
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index ce4df61b4b56..c8251be104d6 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -3890,7 +3890,7 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu)
 	schedule_work(&cp->reset_task);
 #endif
 
-	flush_work_sync(&cp->reset_task);
+	flush_work(&cp->reset_task);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index c2a0fe393267..710f35395195 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9932,7 +9932,7 @@ static int niu_suspend(struct pci_dev *pdev, pm_message_t state)
 	if (!netif_running(dev))
 		return 0;
 
-	flush_work_sync(&np->reset_task);
+	flush_work(&np->reset_task);
 	niu_netif_stop(np);
 
 	del_timer_sync(&np->timer);
diff --git a/drivers/net/wireless/hostap/hostap_ap.c b/drivers/net/wireless/hostap/hostap_ap.c
index e1f410277242..c6ea995750db 100644
--- a/drivers/net/wireless/hostap/hostap_ap.c
+++ b/drivers/net/wireless/hostap/hostap_ap.c
@@ -860,10 +860,10 @@ void hostap_free_data(struct ap_data *ap)
 		return;
 	}
 
-	flush_work_sync(&ap->add_sta_proc_queue);
+	flush_work(&ap->add_sta_proc_queue);
 
 #ifndef PRISM2_NO_KERNEL_IEEE80211_MGMT
-	flush_work_sync(&ap->wds_oper_queue);
+	flush_work(&ap->wds_oper_queue);
 	if (ap->crypt)
 		ap->crypt->deinit(ap->crypt_priv);
 	ap->crypt = ap->crypt_priv = NULL;
diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c
index 50f87b60b0bd..8e7000fd4414 100644
--- a/drivers/net/wireless/hostap/hostap_hw.c
+++ b/drivers/net/wireless/hostap/hostap_hw.c
@@ -3311,13 +3311,13 @@ static void prism2_free_local_data(struct net_device *dev)
 
 	unregister_netdev(local->dev);
 
-	flush_work_sync(&local->reset_queue);
-	flush_work_sync(&local->set_multicast_list_queue);
-	flush_work_sync(&local->set_tim_queue);
+	flush_work(&local->reset_queue);
+	flush_work(&local->set_multicast_list_queue);
+	flush_work(&local->set_tim_queue);
 #ifndef PRISM2_NO_STATION_MODES
-	flush_work_sync(&local->info_queue);
+	flush_work(&local->info_queue);
 #endif
-	flush_work_sync(&local->comms_qual_update);
+	flush_work(&local->comms_qual_update);
 
 	lib80211_crypt_info_free(&local->crypt_info);
 
diff --git a/drivers/power/collie_battery.c b/drivers/power/collie_battery.c
index 74c6b23aeabf..b19bfe400f8c 100644
--- a/drivers/power/collie_battery.c
+++ b/drivers/power/collie_battery.c
@@ -290,7 +290,7 @@ static struct gpio collie_batt_gpios[] = {
 static int collie_bat_suspend(struct ucb1x00_dev *dev, pm_message_t state)
 {
 	/* flush all pending status updates */
-	flush_work_sync(&bat_work);
+	flush_work(&bat_work);
 	return 0;
 }
 
diff --git a/drivers/power/tosa_battery.c b/drivers/power/tosa_battery.c
index 28bbe7e094e3..51199b5ce221 100644
--- a/drivers/power/tosa_battery.c
+++ b/drivers/power/tosa_battery.c
@@ -327,7 +327,7 @@ static struct gpio tosa_bat_gpios[] = {
 static int tosa_bat_suspend(struct platform_device *dev, pm_message_t state)
 {
 	/* flush all pending status updates */
-	flush_work_sync(&bat_work);
+	flush_work(&bat_work);
 	return 0;
 }
 
diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index d2d4c08c681c..1245fe1f48c3 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -146,7 +146,7 @@ static irqreturn_t wm97xx_chrg_irq(int irq, void *data)
 #ifdef CONFIG_PM
 static int wm97xx_bat_suspend(struct device *dev)
 {
-	flush_work_sync(&bat_work);
+	flush_work(&bat_work);
 	return 0;
 }
 
diff --git a/drivers/power/z2_battery.c b/drivers/power/z2_battery.c
index 8c9a607ea77a..5757d0d6782f 100644
--- a/drivers/power/z2_battery.c
+++ b/drivers/power/z2_battery.c
@@ -276,7 +276,7 @@ static int z2_batt_suspend(struct device *dev)
 	struct i2c_client *client = to_i2c_client(dev);
 	struct z2_charger *charger = i2c_get_clientdata(client);
 
-	flush_work_sync(&charger->bat_work);
+	flush_work(&charger->bat_work);
 	return 0;
 }
 
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index f092588a078c..1d2b70017a8d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3335,7 +3335,7 @@ void regulator_unregister(struct regulator_dev *rdev)
 		regulator_put(rdev->supply);
 	mutex_lock(&regulator_list_mutex);
 	debugfs_remove_recursive(rdev->debugfs);
-	flush_work_sync(&rdev->disable_work.work);
+	flush_work(&rdev->disable_work.work);
 	WARN_ON(rdev->open_count);
 	unset_regulator_supplies(rdev);
 	list_del(&rdev->list);
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index def24a1079ad..33c52bc2c7b4 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -999,7 +999,7 @@ static void arcmsr_remove(struct pci_dev *pdev)
 	int poll_count = 0;
 	arcmsr_free_sysfs_attr(acb);
 	scsi_remove_host(host);
-	flush_work_sync(&acb->arcmsr_do_message_isr_bh);
+	flush_work(&acb->arcmsr_do_message_isr_bh);
 	del_timer_sync(&acb->eternal_timer);
 	arcmsr_disable_outbound_ints(acb);
 	arcmsr_stop_adapter_bgrb(acb);
@@ -1045,7 +1045,7 @@ static void arcmsr_shutdown(struct pci_dev *pdev)
 		(struct AdapterControlBlock *)host->hostdata;
 	del_timer_sync(&acb->eternal_timer);
 	arcmsr_disable_outbound_ints(acb);
-	flush_work_sync(&acb->arcmsr_do_message_isr_bh);
+	flush_work(&acb->arcmsr_do_message_isr_bh);
 	arcmsr_stop_adapter_bgrb(acb);
 	arcmsr_flush_adapter_cache(acb);
 }
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 467dc38246f9..2ffeb9afd1b7 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -9020,7 +9020,7 @@ static void __ipr_remove(struct pci_dev *pdev)
 
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
-	flush_work_sync(&ioa_cfg->work_q);
+	flush_work(&ioa_cfg->work_q);
 	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
 
 	spin_lock(&ipr_driver_lock);
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index ea8a0b47d66d..af763eab2039 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -5459,7 +5459,7 @@ static void __devexit pmcraid_remove(struct pci_dev *pdev)
 	pmcraid_shutdown(pdev);
 
 	pmcraid_disable_interrupts(pinstance, ~0);
-	flush_work_sync(&pinstance->worker_q);
+	flush_work(&pinstance->worker_q);
 
 	pmcraid_kill_tasklets(pinstance);
 	pmcraid_unregister_interrupt_handler(pinstance);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 5b30132960c7..bddc97c5c8e9 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -969,7 +969,7 @@ void qlt_stop_phase1(struct qla_tgt *tgt)
 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
 	mutex_unlock(&ha->tgt.tgt_mutex);
 
-	flush_delayed_work_sync(&tgt->sess_del_work);
+	flush_delayed_work(&tgt->sess_del_work);
 
 	ql_dbg(ql_dbg_tgt_mgt, vha, 0xf009,
 	    "Waiting for sess works (tgt %p)", tgt);
diff --git a/drivers/tty/hvc/hvsi.c b/drivers/tty/hvc/hvsi.c
index 6f5bc49c441f..1f8e8b37ed23 100644
--- a/drivers/tty/hvc/hvsi.c
+++ b/drivers/tty/hvc/hvsi.c
@@ -765,7 +765,7 @@ static void hvsi_flush_output(struct hvsi_struct *hp)
 
 	/* 'writer' could still be pending if it didn't see n_outbuf = 0 yet */
 	cancel_delayed_work_sync(&hp->writer);
-	flush_work_sync(&hp->handshaker);
+	flush_work(&hp->handshaker);
 
 	/*
 	 * it's also possible that our timeout expired and hvsi_write_worker
diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c
index 0aeb5a38d296..b4ba0670dc54 100644
--- a/drivers/tty/ipwireless/hardware.c
+++ b/drivers/tty/ipwireless/hardware.c
@@ -1729,7 +1729,7 @@ void ipwireless_hardware_free(struct ipw_hardware *hw)
 
 	ipwireless_stop_interrupts(hw);
 
-	flush_work_sync(&hw->work_rx);
+	flush_work(&hw->work_rx);
 
 	for (i = 0; i < NL_NUM_OF_ADDRESSES; i++)
 		if (hw->packet_assembler[i] != NULL)
diff --git a/drivers/tty/ipwireless/network.c b/drivers/tty/ipwireless/network.c
index 57c8b481113f..90ea902ff191 100644
--- a/drivers/tty/ipwireless/network.c
+++ b/drivers/tty/ipwireless/network.c
@@ -430,8 +430,8 @@ void ipwireless_network_free(struct ipw_network *network)
 	network->shutting_down = 1;
 
 	ipwireless_ppp_close(network);
-	flush_work_sync(&network->work_go_online);
-	flush_work_sync(&network->work_go_offline);
+	flush_work(&network->work_go_online);
+	flush_work(&network->work_go_offline);
 
 	ipwireless_stop_interrupts(network->hardware);
 	ipwireless_associate_network(network->hardware, NULL);
diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 2b42a01a81c6..ebabf929f460 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -122,7 +122,7 @@ static void kgdboc_unregister_kbd(void)
 			i--;
 		}
 	}
-	flush_work_sync(&kgdboc_restore_input_work);
+	flush_work(&kgdboc_restore_input_work);
 }
 #else /* ! CONFIG_KDB_KEYBOARD */
 #define kgdboc_register_kbd(x) 0
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index d3cda0cb2df0..0952d71bdf28 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -1205,7 +1205,7 @@ static int serial_omap_suspend(struct device *dev)
 
 	if (up) {
 		uart_suspend_port(&serial_omap_reg, &up->port);
-		flush_work_sync(&up->qos_work);
+		flush_work(&up->qos_work);
 	}
 
 	return 0;
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 6f99c9959f0c..ac5be812dbe3 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -523,9 +523,9 @@ static int tty_ldisc_halt(struct tty_struct *tty)
  */
 static void tty_ldisc_flush_works(struct tty_struct *tty)
 {
-	flush_work_sync(&tty->hangup_work);
-	flush_work_sync(&tty->SAK_work);
-	flush_work_sync(&tty->buf.work);
+	flush_work(&tty->hangup_work);
+	flush_work(&tty->SAK_work);
+	flush_work(&tty->buf.work);
 }
 
 /**
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index 975e9c6691d6..807627b36cc8 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -718,7 +718,7 @@ static void speedtch_atm_stop(struct usbatm_data *usbatm, struct atm_dev *atm_de
 	del_timer_sync(&instance->resubmit_timer);
 	usb_free_urb(int_urb);
 
-	flush_work_sync(&instance->status_check_work);
+	flush_work(&instance->status_check_work);
 }
 
 static int speedtch_pre_reset(struct usb_interface *intf)
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index d7e422dc0ef7..9d1a044504e8 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -2234,7 +2234,7 @@ static void uea_stop(struct uea_softc *sc)
 	usb_free_urb(sc->urb_int);
 
 	/* flush the work item, when no one can schedule it */
-	flush_work_sync(&sc->task);
+	flush_work(&sc->task);
 
 	release_firmware(sc->dsp_firm);
 	uea_leaves(INS_TO_USBDEV(sc));
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index 90e82e288eb9..f1e5fbf5c2c7 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -834,7 +834,7 @@ void gether_cleanup(void)
 		return;
 
 	unregister_netdev(the_dev->net);
-	flush_work_sync(&the_dev->work);
+	flush_work(&the_dev->work);
 	free_netdev(the_dev->net);
 
 	the_dev = NULL;
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index 2b1e8d84c873..2364098ea83c 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -893,7 +893,7 @@ static void ohci_stop (struct usb_hcd *hcd)
 	ohci_dump (ohci, 1);
 
 	if (quirk_nec(ohci))
-		flush_work_sync(&ohci->nec_work);
+		flush_work(&ohci->nec_work);
 
 	ohci_usb_reset (ohci);
 	ohci_writel (ohci, OHCI_INTR_MIE, &ohci->regs->intrdisable);
diff --git a/drivers/usb/otg/isp1301_omap.c b/drivers/usb/otg/isp1301_omap.c
index 7a88667742b6..720df35561c9 100644
--- a/drivers/usb/otg/isp1301_omap.c
+++ b/drivers/usb/otg/isp1301_omap.c
@@ -1230,7 +1230,7 @@ static int __exit isp1301_remove(struct i2c_client *i2c)
 	isp->timer.data = 0;
 	set_bit(WORK_STOP, &isp->todo);
 	del_timer_sync(&isp->timer);
-	flush_work_sync(&isp->work);
+	flush_work(&isp->work);
 
 	put_device(&i2c->dev);
 	the_transceiver = NULL;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index c70f1e5fc024..022cecb0757d 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -551,7 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
 		return -EINVAL;
 	}
 
-	flush_delayed_work_sync(&sbi->sb_work);
+	flush_delayed_work(&sbi->sb_work);
 	replace_mount_options(sb, new_opts);
 
 	sbi->s_flags = mount_flags;
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 4a38db739ca0..0fb6539b0c8c 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -1289,7 +1289,7 @@ static void gdlm_unmount(struct gfs2_sbd *sdp)
 	spin_lock(&ls->ls_recover_spin);
 	set_bit(DFL_UNMOUNT, &ls->ls_recover_flags);
 	spin_unlock(&ls->ls_recover_spin);
-	flush_delayed_work_sync(&sdp->sd_control_work);
+	flush_delayed_work(&sdp->sd_control_work);
 
 	/* mounted_lock and control_lock will be purged in dlm recovery */
 release:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fc3168f47a14..867700aba536 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1572,7 +1572,7 @@ out:
 	clear_inode(inode);
 	gfs2_dir_hash_inval(ip);
 	ip->i_gl->gl_object = NULL;
-	flush_delayed_work_sync(&ip->i_gl->gl_work);
+	flush_delayed_work(&ip->i_gl->gl_work);
 	gfs2_glock_add_to_lru(ip->i_gl);
 	gfs2_glock_put(ip->i_gl);
 	ip->i_gl = NULL;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index ee1bc55677f1..553909395270 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -644,7 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
 
 	/* sync the superblock to buffers */
 	sb = inode->i_sb;
-	flush_delayed_work_sync(&HFS_SB(sb)->mdb_work);
+	flush_delayed_work(&HFS_SB(sb)->mdb_work);
 	/* .. finally sync the buffers to disk */
 	err = sync_blockdev(sb->s_bdev);
 	if (!ret)
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 333df07ae3bd..eaa74323663a 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -314,11 +314,11 @@ static void ncp_stop_tasks(struct ncp_server *server) {
 	release_sock(sk);
 	del_timer_sync(&server->timeout_tm);
 
-	flush_work_sync(&server->rcv.tq);
+	flush_work(&server->rcv.tq);
 	if (sk->sk_socket->type == SOCK_STREAM)
-		flush_work_sync(&server->tx.tq);
+		flush_work(&server->tx.tq);
 	else
-		flush_work_sync(&server->timeout_tq);
+		flush_work(&server->timeout_tq);
 }
 
 static int  ncp_show_options(struct seq_file *seq, struct dentry *root)
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index 8f9cea1597af..c19897d0fe14 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -327,5 +327,5 @@ void o2quo_exit(void)
 {
 	struct o2quo_state *qs = &o2quo_state;
 
-	flush_work_sync(&qs->qs_work);
+	flush_work(&qs->qs_work);
 }
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index bdaf4cb9f4a2..e8e6be439bcd 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -953,7 +953,7 @@ xfs_fs_sync_fs(
 		 * We schedule xfssyncd now (now that the disk is
 		 * active) instead of later (when it might not be).
 		 */
-		flush_delayed_work_sync(&mp->m_sync_work);
+		flush_delayed_work(&mp->m_sync_work);
 	}
 
 	return 0;
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 96548176db80..9500caf15acf 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -475,7 +475,7 @@ xfs_flush_inodes(
 	struct xfs_mount	*mp = ip->i_mount;
 
 	queue_work(xfs_syncd_wq, &mp->m_flush_work);
-	flush_work_sync(&mp->m_flush_work);
+	flush_work(&mp->m_flush_work);
 }
 
 STATIC void
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 855fcdaa2d72..a351be7c3e91 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -460,13 +460,13 @@ static inline bool __cancel_delayed_work(struct delayed_work *work)
 }
 
 /* used to be different but now identical to flush_work(), deprecated */
-static inline bool flush_work_sync(struct work_struct *work)
+static inline bool __deprecated flush_work_sync(struct work_struct *work)
 {
 	return flush_work(work);
 }
 
 /* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool flush_delayed_work_sync(struct delayed_work *dwork)
+static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
 {
 	return flush_delayed_work(dwork);
 }
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 6449bae15702..505f0ce3f10b 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1083,7 +1083,7 @@ int p9_trans_fd_init(void)
 
 void p9_trans_fd_exit(void)
 {
-	flush_work_sync(&p9_poll_work);
+	flush_work(&p9_poll_work);
 	v9fs_unregister_trans(&p9_tcp_trans);
 	v9fs_unregister_trans(&p9_unix_trans);
 	v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 88e7c2f3fa0d..45295ca09571 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -370,7 +370,7 @@ static int dsa_remove(struct platform_device *pdev)
 	if (dst->link_poll_needed)
 		del_timer_sync(&dst->link_poll_timer);
 
-	flush_work_sync(&dst->link_poll_work);
+	flush_work(&dst->link_poll_work);
 
 	for (i = 0; i < dst->pd->nr_chips; i++) {
 		struct dsa_switch *ds = dst->ds[i];
diff --git a/sound/i2c/other/ak4113.c b/sound/i2c/other/ak4113.c
index dde5c9c92132..ef68d710d08c 100644
--- a/sound/i2c/other/ak4113.c
+++ b/sound/i2c/other/ak4113.c
@@ -141,7 +141,7 @@ void snd_ak4113_reinit(struct ak4113 *chip)
 {
 	chip->init = 1;
 	mb();
-	flush_delayed_work_sync(&chip->work);
+	flush_delayed_work(&chip->work);
 	ak4113_init_regs(chip);
 	/* bring up statistics / event queing */
 	chip->init = 0;
diff --git a/sound/i2c/other/ak4114.c b/sound/i2c/other/ak4114.c
index fdf3c1b65e38..816e7d225fb0 100644
--- a/sound/i2c/other/ak4114.c
+++ b/sound/i2c/other/ak4114.c
@@ -154,7 +154,7 @@ void snd_ak4114_reinit(struct ak4114 *chip)
 {
 	chip->init = 1;
 	mb();
-	flush_delayed_work_sync(&chip->work);
+	flush_delayed_work(&chip->work);
 	ak4114_init_regs(chip);
 	/* bring up statistics / event queing */
 	chip->init = 0;
diff --git a/sound/pci/oxygen/oxygen_lib.c b/sound/pci/oxygen/oxygen_lib.c
index ab8738e21ad1..e9fa2d07951d 100644
--- a/sound/pci/oxygen/oxygen_lib.c
+++ b/sound/pci/oxygen/oxygen_lib.c
@@ -573,8 +573,8 @@ static void oxygen_card_free(struct snd_card *card)
 	oxygen_shutdown(chip);
 	if (chip->irq >= 0)
 		free_irq(chip->irq, chip);
-	flush_work_sync(&chip->spdif_input_bits_work);
-	flush_work_sync(&chip->gpio_work);
+	flush_work(&chip->spdif_input_bits_work);
+	flush_work(&chip->gpio_work);
 	chip->model.cleanup(chip);
 	kfree(chip->model_data);
 	mutex_destroy(&chip->mutex);
@@ -751,8 +751,8 @@ static int oxygen_pci_suspend(struct device *dev)
 	spin_unlock_irq(&chip->reg_lock);
 
 	synchronize_irq(chip->irq);
-	flush_work_sync(&chip->spdif_input_bits_work);
-	flush_work_sync(&chip->gpio_work);
+	flush_work(&chip->spdif_input_bits_work);
+	flush_work(&chip->gpio_work);
 	chip->interrupt_mask = saved_interrupt_mask;
 
 	pci_disable_device(pci);
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index d26c8ae4e6d9..a4cae060bf26 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -1601,7 +1601,7 @@ static int  wm8350_codec_remove(struct snd_soc_codec *codec)
 
 	/* if there was any work waiting then we run it now and
 	 * wait for its completion */
-	flush_delayed_work_sync(&codec->dapm.delayed_work);
+	flush_delayed_work(&codec->dapm.delayed_work);
 
 	wm8350_set_bias_level(codec, SND_SOC_BIAS_OFF);
 
diff --git a/sound/soc/codecs/wm8753.c b/sound/soc/codecs/wm8753.c
index 13bff87ddcf5..2e4a775ae560 100644
--- a/sound/soc/codecs/wm8753.c
+++ b/sound/soc/codecs/wm8753.c
@@ -1509,7 +1509,7 @@ static int wm8753_probe(struct snd_soc_codec *codec)
 /* power down chip */
 static int wm8753_remove(struct snd_soc_codec *codec)
 {
-	flush_delayed_work_sync(&codec->dapm.delayed_work);
+	flush_delayed_work(&codec->dapm.delayed_work);
 	wm8753_set_bias_level(codec, SND_SOC_BIAS_OFF);
 
 	return 0;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index f219b2f7ee68..f6eec782c61e 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -591,7 +591,7 @@ int snd_soc_suspend(struct device *dev)
 
 	/* close any waiting streams and save state */
 	for (i = 0; i < card->num_rtd; i++) {
-		flush_delayed_work_sync(&card->rtd[i].delayed_work);
+		flush_delayed_work(&card->rtd[i].delayed_work);
 		card->rtd[i].codec->dapm.suspend_bias_level = card->rtd[i].codec->dapm.bias_level;
 	}
 
@@ -1846,7 +1846,7 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card)
 	/* make sure any delayed work runs */
 	for (i = 0; i < card->num_rtd; i++) {
 		struct snd_soc_pcm_runtime *rtd = &card->rtd[i];
-		flush_delayed_work_sync(&rtd->delayed_work);
+		flush_delayed_work(&rtd->delayed_work);
 	}
 
 	/* remove auxiliary devices */
@@ -1890,7 +1890,7 @@ int snd_soc_poweroff(struct device *dev)
 	 * now, we're shutting down so no imminent restart. */
 	for (i = 0; i < card->num_rtd; i++) {
 		struct snd_soc_pcm_runtime *rtd = &card->rtd[i];
-		flush_delayed_work_sync(&rtd->delayed_work);
+		flush_delayed_work(&rtd->delayed_work);
 	}
 
 	snd_soc_dapm_shutdown(card);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7d7e2aaffece..67a35e90384c 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -90,7 +90,7 @@ irqfd_shutdown(struct work_struct *work)
 	 * We know no new events will be scheduled at this point, so block
 	 * until all previously outstanding events have completed
 	 */
-	flush_work_sync(&irqfd->inject);
+	flush_work(&irqfd->inject);
 
 	/*
 	 * It is now safe to release the object's resources
-- 
cgit v1.2.3


From 3b07e9ca26866697616097044f25fbe53dbab693 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 20 Aug 2012 14:51:24 -0700
Subject: workqueue: deprecate system_nrt[_freezable]_wq

system_nrt[_freezable]_wq are now spurious.  Mark them deprecated and
convert all users to system[_freezable]_wq.

If you're cc'd and wondering what's going on: Now all workqueues are
non-reentrant, so there's no reason to use system_nrt[_freezable]_wq.
Please use system[_freezable]_wq instead.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-By: Lai Jiangshan <laijs@cn.fujitsu.com>

Cc: Jens Axboe <axboe@kernel.dk>
Cc: David Airlie <airlied@linux.ie>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: David Howells <dhowells@redhat.com>
---
 block/blk-throttle.c              |  7 +++----
 block/genhd.c                     | 10 +++++-----
 drivers/gpu/drm/drm_crtc_helper.c |  6 +++---
 drivers/hid/hid-wiimote-ext.c     |  2 +-
 drivers/mmc/core/host.c           |  4 ++--
 drivers/net/virtio_net.c          | 12 ++++++------
 include/linux/workqueue.h         |  4 ++--
 kernel/srcu.c                     |  4 ++--
 security/keys/gc.c                |  8 ++++----
 security/keys/key.c               |  2 +-
 10 files changed, 29 insertions(+), 30 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e287c19908c8..5a58e779912b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -180,7 +180,7 @@ static inline unsigned int total_nr_queued(struct throtl_data *td)
 
 /*
  * Worker for allocating per cpu stat for tgs. This is scheduled on the
- * system_nrt_wq once there are some groups on the alloc_list waiting for
+ * system_wq once there are some groups on the alloc_list waiting for
  * allocation.
  */
 static void tg_stats_alloc_fn(struct work_struct *work)
@@ -194,8 +194,7 @@ alloc_stats:
 		stats_cpu = alloc_percpu(struct tg_stats_cpu);
 		if (!stats_cpu) {
 			/* allocation failed, try again after some time */
-			queue_delayed_work(system_nrt_wq, dwork,
-					   msecs_to_jiffies(10));
+			schedule_delayed_work(dwork, msecs_to_jiffies(10));
 			return;
 		}
 	}
@@ -238,7 +237,7 @@ static void throtl_pd_init(struct blkcg_gq *blkg)
 	 */
 	spin_lock_irqsave(&tg_stats_alloc_lock, flags);
 	list_add(&tg->stats_alloc_node, &tg_stats_alloc_list);
-	queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0);
+	schedule_delayed_work(&tg_stats_alloc_work, 0);
 	spin_unlock_irqrestore(&tg_stats_alloc_lock, flags);
 }
 
diff --git a/block/genhd.c b/block/genhd.c
index 5d8b44a6442b..a2f3d6a5f55c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1490,9 +1490,9 @@ static void __disk_unblock_events(struct gendisk *disk, bool check_now)
 	intv = disk_events_poll_jiffies(disk);
 	set_timer_slack(&ev->dwork.timer, intv / 4);
 	if (check_now)
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
+		queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
 	else if (intv)
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
 out_unlock:
 	spin_unlock_irqrestore(&ev->lock, flags);
 }
@@ -1535,7 +1535,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 	spin_lock_irq(&ev->lock);
 	ev->clearing |= mask;
 	if (!ev->block)
-		mod_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
+		mod_delayed_work(system_freezable_wq, &ev->dwork, 0);
 	spin_unlock_irq(&ev->lock);
 }
 
@@ -1571,7 +1571,7 @@ unsigned int disk_clear_events(struct gendisk *disk, unsigned int mask)
 
 	/* uncondtionally schedule event check and wait for it to finish */
 	disk_block_events(disk);
-	queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, 0);
+	queue_delayed_work(system_freezable_wq, &ev->dwork, 0);
 	flush_delayed_work(&ev->dwork);
 	__disk_unblock_events(disk, false);
 
@@ -1608,7 +1608,7 @@ static void disk_events_workfn(struct work_struct *work)
 
 	intv = disk_events_poll_jiffies(disk);
 	if (!ev->block && intv)
-		queue_delayed_work(system_nrt_freezable_wq, &ev->dwork, intv);
+		queue_delayed_work(system_freezable_wq, &ev->dwork, intv);
 
 	spin_unlock_irq(&ev->lock);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 3252e7067d8b..8fa9d52820d9 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -968,7 +968,7 @@ static void output_poll_execute(struct work_struct *work)
 	}
 
 	if (repoll)
-		queue_delayed_work(system_nrt_wq, delayed_work, DRM_OUTPUT_POLL_PERIOD);
+		schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
 }
 
 void drm_kms_helper_poll_disable(struct drm_device *dev)
@@ -993,7 +993,7 @@ void drm_kms_helper_poll_enable(struct drm_device *dev)
 	}
 
 	if (poll)
-		queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
+		schedule_delayed_work(&dev->mode_config.output_poll_work, DRM_OUTPUT_POLL_PERIOD);
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
@@ -1020,6 +1020,6 @@ void drm_helper_hpd_irq_event(struct drm_device *dev)
 	/* kill timer and schedule immediate execution, this doesn't block */
 	cancel_delayed_work(&dev->mode_config.output_poll_work);
 	if (drm_kms_helper_poll)
-		queue_delayed_work(system_nrt_wq, &dev->mode_config.output_poll_work, 0);
+		schedule_delayed_work(&dev->mode_config.output_poll_work, 0);
 }
 EXPORT_SYMBOL(drm_helper_hpd_irq_event);
diff --git a/drivers/hid/hid-wiimote-ext.c b/drivers/hid/hid-wiimote-ext.c
index 0a1805c9b0e5..d37cd092ffc7 100644
--- a/drivers/hid/hid-wiimote-ext.c
+++ b/drivers/hid/hid-wiimote-ext.c
@@ -204,7 +204,7 @@ static void wiiext_worker(struct work_struct *work)
 /* schedule work only once, otherwise mark for reschedule */
 static void wiiext_schedule(struct wiimote_ext *ext)
 {
-	queue_work(system_nrt_wq, &ext->worker);
+	schedule_work(&ext->worker);
 }
 
 /*
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 597f189b4427..ee2e16b17017 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -204,8 +204,8 @@ void mmc_host_clk_release(struct mmc_host *host)
 	host->clk_requests--;
 	if (mmc_host_may_gate_card(host->card) &&
 	    !host->clk_requests)
-		queue_delayed_work(system_nrt_wq, &host->clk_gate_work,
-				msecs_to_jiffies(host->clkgate_delay));
+		schedule_delayed_work(&host->clk_gate_work,
+				      msecs_to_jiffies(host->clkgate_delay));
 	spin_unlock_irqrestore(&host->clk_lock, flags);
 }
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 83d2b0c34c5e..9650c413e11f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -521,7 +521,7 @@ static void refill_work(struct work_struct *work)
 	/* In theory, this can happen: if we don't get any buffers in
 	 * we will *never* try to fill again. */
 	if (still_empty)
-		queue_delayed_work(system_nrt_wq, &vi->refill, HZ/2);
+		schedule_delayed_work(&vi->refill, HZ/2);
 }
 
 static int virtnet_poll(struct napi_struct *napi, int budget)
@@ -540,7 +540,7 @@ again:
 
 	if (vi->num < vi->max / 2) {
 		if (!try_fill_recv(vi, GFP_ATOMIC))
-			queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+			schedule_delayed_work(&vi->refill, 0);
 	}
 
 	/* Out of packets? */
@@ -745,7 +745,7 @@ static int virtnet_open(struct net_device *dev)
 
 	/* Make sure we have some buffers: if oom use wq. */
 	if (!try_fill_recv(vi, GFP_KERNEL))
-		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+		schedule_delayed_work(&vi->refill, 0);
 
 	virtnet_napi_enable(vi);
 	return 0;
@@ -1020,7 +1020,7 @@ static void virtnet_config_changed(struct virtio_device *vdev)
 {
 	struct virtnet_info *vi = vdev->priv;
 
-	queue_work(system_nrt_wq, &vi->config_work);
+	schedule_work(&vi->config_work);
 }
 
 static int init_vqs(struct virtnet_info *vi)
@@ -1152,7 +1152,7 @@ static int virtnet_probe(struct virtio_device *vdev)
 	   otherwise get link status from config. */
 	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
 		netif_carrier_off(dev);
-		queue_work(system_nrt_wq, &vi->config_work);
+		schedule_work(&vi->config_work);
 	} else {
 		vi->status = VIRTIO_NET_S_LINK_UP;
 		netif_carrier_on(dev);
@@ -1264,7 +1264,7 @@ static int virtnet_restore(struct virtio_device *vdev)
 	netif_device_attach(vi->dev);
 
 	if (!try_fill_recv(vi, GFP_KERNEL))
-		queue_delayed_work(system_nrt_wq, &vi->refill, 0);
+		schedule_delayed_work(&vi->refill, 0);
 
 	mutex_lock(&vi->config_lock);
 	vi->config_enable = true;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a351be7c3e91..1ce3fb08308d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -310,12 +310,12 @@ extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
 
-static inline struct workqueue_struct *__system_nrt_wq(void)
+static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
 {
 	return system_wq;
 }
 
-static inline struct workqueue_struct *__system_nrt_freezable_wq(void)
+static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
 {
 	return system_freezable_wq;
 }
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 2095be3318d5..97c465ebd844 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -379,7 +379,7 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
 	rcu_batch_queue(&sp->batch_queue, head);
 	if (!sp->running) {
 		sp->running = true;
-		queue_delayed_work(system_nrt_wq, &sp->work, 0);
+		schedule_delayed_work(&sp->work, 0);
 	}
 	spin_unlock_irqrestore(&sp->queue_lock, flags);
 }
@@ -631,7 +631,7 @@ static void srcu_reschedule(struct srcu_struct *sp)
 	}
 
 	if (pending)
-		queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL);
+		schedule_delayed_work(&sp->work, SRCU_INTERVAL);
 }
 
 /*
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 61ab7c82ebb1..d67c97bb1025 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -62,7 +62,7 @@ void key_schedule_gc(time_t gc_at)
 
 	if (gc_at <= now || test_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags)) {
 		kdebug("IMMEDIATE");
-		queue_work(system_nrt_wq, &key_gc_work);
+		schedule_work(&key_gc_work);
 	} else if (gc_at < key_gc_next_run) {
 		kdebug("DEFERRED");
 		key_gc_next_run = gc_at;
@@ -77,7 +77,7 @@ void key_schedule_gc(time_t gc_at)
 void key_schedule_gc_links(void)
 {
 	set_bit(KEY_GC_KEY_EXPIRED, &key_gc_flags);
-	queue_work(system_nrt_wq, &key_gc_work);
+	schedule_work(&key_gc_work);
 }
 
 /*
@@ -120,7 +120,7 @@ void key_gc_keytype(struct key_type *ktype)
 	set_bit(KEY_GC_REAP_KEYTYPE, &key_gc_flags);
 
 	kdebug("schedule");
-	queue_work(system_nrt_wq, &key_gc_work);
+	schedule_work(&key_gc_work);
 
 	kdebug("sleep");
 	wait_on_bit(&key_gc_flags, KEY_GC_REAPING_KEYTYPE, key_gc_wait_bit,
@@ -369,7 +369,7 @@ maybe_resched:
 	}
 
 	if (gc_state & KEY_GC_REAP_AGAIN)
-		queue_work(system_nrt_wq, &key_gc_work);
+		schedule_work(&key_gc_work);
 	kleave(" [end %x]", gc_state);
 	return;
 
diff --git a/security/keys/key.c b/security/keys/key.c
index 50d96d4e06f2..3cbe3529c418 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -598,7 +598,7 @@ void key_put(struct key *key)
 		key_check(key);
 
 		if (atomic_dec_and_test(&key->usage))
-			queue_work(system_nrt_wq, &key_gc_work);
+			schedule_work(&key_gc_work);
 	}
 }
 EXPORT_SYMBOL(key_put);
-- 
cgit v1.2.3


From ee64e7f697ad7e5575e6ac8900cfb71975484421 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Aug 2012 13:18:23 -0700
Subject: workqueue: cosmetic whitespace updates for macro definitions

Consistently use the last tab position for '\' line continuation in
complex macro definitions.  This is to help the following patches.

This patch is cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 126 +++++++++++++++++++++++-----------------------
 1 file changed, 63 insertions(+), 63 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1ce3fb08308d..26c5b4c63861 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -126,43 +126,43 @@ struct execute_work {
 #define __WORK_INIT_LOCKDEP_MAP(n, k)
 #endif
 
-#define __WORK_INITIALIZER(n, f) {				\
-	.data = WORK_DATA_STATIC_INIT(),			\
-	.entry	= { &(n).entry, &(n).entry },			\
-	.func = (f),						\
-	__WORK_INIT_LOCKDEP_MAP(#n, &(n))			\
+#define __WORK_INITIALIZER(n, f) {					\
+	.data = WORK_DATA_STATIC_INIT(),				\
+	.entry	= { &(n).entry, &(n).entry },				\
+	.func = (f),							\
+	__WORK_INIT_LOCKDEP_MAP(#n, &(n))				\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f) {			\
-	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,	\
-				0, (unsigned long)&(n)),	\
+#define __DELAYED_WORK_INITIALIZER(n, f) {				\
+	.work = __WORK_INITIALIZER((n).work, (f)),			\
+	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,		\
+				0, (unsigned long)&(n)),		\
 	}
 
-#define __DEFERRED_WORK_INITIALIZER(n, f) {			\
-	.work = __WORK_INITIALIZER((n).work, (f)),		\
-	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn, \
-				0, (unsigned long)&(n)),	\
+#define __DEFERRED_WORK_INITIALIZER(n, f) {				\
+	.work = __WORK_INITIALIZER((n).work, (f)),			\
+	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn,	\
+				0, (unsigned long)&(n)),		\
 	}
 
-#define DECLARE_WORK(n, f)					\
+#define DECLARE_WORK(n, f)						\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
-#define DECLARE_DELAYED_WORK(n, f)				\
+#define DECLARE_DELAYED_WORK(n, f)					\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
-#define DECLARE_DEFERRED_WORK(n, f)				\
+#define DECLARE_DEFERRED_WORK(n, f)					\
 	struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
 
 /*
  * initialize a work item's function pointer
  */
-#define PREPARE_WORK(_work, _func)				\
-	do {							\
-		(_work)->func = (_func);			\
+#define PREPARE_WORK(_work, _func)					\
+	do {								\
+		(_work)->func = (_func);				\
 	} while (0)
 
-#define PREPARE_DELAYED_WORK(_work, _func)			\
+#define PREPARE_DELAYED_WORK(_work, _func)				\
 	PREPARE_WORK(&(_work)->work, (_func))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -192,7 +192,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 									\
 		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
-		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0);\
+		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
 		INIT_LIST_HEAD(&(_work)->entry);			\
 		PREPARE_WORK((_work), (_func));				\
 	} while (0)
@@ -206,38 +206,38 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	} while (0)
 #endif
 
-#define INIT_WORK(_work, _func)					\
-	do {							\
-		__INIT_WORK((_work), (_func), 0);		\
+#define INIT_WORK(_work, _func)						\
+	do {								\
+		__INIT_WORK((_work), (_func), 0);			\
 	} while (0)
 
-#define INIT_WORK_ONSTACK(_work, _func)				\
-	do {							\
-		__INIT_WORK((_work), (_func), 1);		\
+#define INIT_WORK_ONSTACK(_work, _func)					\
+	do {								\
+		__INIT_WORK((_work), (_func), 1);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func)				\
-	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
-		init_timer(&(_work)->timer);			\
-		(_work)->timer.function = delayed_work_timer_fn;\
-		(_work)->timer.data = (unsigned long)(_work);	\
+#define INIT_DELAYED_WORK(_work, _func)					\
+	do {								\
+		INIT_WORK(&(_work)->work, (_func));			\
+		init_timer(&(_work)->timer);				\
+		(_work)->timer.function = delayed_work_timer_fn;	\
+		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ONSTACK(_work, _func)			\
-	do {							\
-		INIT_WORK_ONSTACK(&(_work)->work, (_func));	\
-		init_timer_on_stack(&(_work)->timer);		\
-		(_work)->timer.function = delayed_work_timer_fn;\
-		(_work)->timer.data = (unsigned long)(_work);	\
+#define INIT_DELAYED_WORK_ONSTACK(_work, _func)				\
+	do {								\
+		INIT_WORK_ONSTACK(&(_work)->work, (_func));		\
+		init_timer_on_stack(&(_work)->timer);			\
+		(_work)->timer.function = delayed_work_timer_fn;	\
+		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)		\
-	do {							\
-		INIT_WORK(&(_work)->work, (_func));		\
-		init_timer_deferrable(&(_work)->timer);		\
-		(_work)->timer.function = delayed_work_timer_fn;\
-		(_work)->timer.data = (unsigned long)(_work);	\
+#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+	do {								\
+		INIT_WORK(&(_work)->work, (_func));			\
+		init_timer_deferrable(&(_work)->timer);			\
+		(_work)->timer.function = delayed_work_timer_fn;	\
+		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
 /**
@@ -345,22 +345,22 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #ifdef CONFIG_LOCKDEP
-#define alloc_workqueue(fmt, flags, max_active, args...)	\
-({								\
-	static struct lock_class_key __key;			\
-	const char *__lock_name;				\
-								\
-	if (__builtin_constant_p(fmt))				\
-		__lock_name = (fmt);				\
-	else							\
-		__lock_name = #fmt;				\
-								\
-	__alloc_workqueue_key((fmt), (flags), (max_active),	\
-			      &__key, __lock_name, ##args);	\
+#define alloc_workqueue(fmt, flags, max_active, args...)		\
+({									\
+	static struct lock_class_key __key;				\
+	const char *__lock_name;					\
+									\
+	if (__builtin_constant_p(fmt))					\
+		__lock_name = (fmt);					\
+	else								\
+		__lock_name = #fmt;					\
+									\
+	__alloc_workqueue_key((fmt), (flags), (max_active),		\
+			      &__key, __lock_name, ##args);		\
 })
 #else
-#define alloc_workqueue(fmt, flags, max_active, args...)	\
-	__alloc_workqueue_key((fmt), (flags), (max_active),	\
+#define alloc_workqueue(fmt, flags, max_active, args...)		\
+	__alloc_workqueue_key((fmt), (flags), (max_active),		\
 			      NULL, NULL, ##args)
 #endif
 
@@ -377,14 +377,14 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * RETURNS:
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
-#define alloc_ordered_workqueue(fmt, flags, args...)		\
+#define alloc_ordered_workqueue(fmt, flags, args...)			\
 	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
 
-#define create_workqueue(name)					\
+#define create_workqueue(name)						\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
-#define create_freezable_workqueue(name)			\
+#define create_freezable_workqueue(name)				\
 	alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
-#define create_singlethread_workqueue(name)			\
+#define create_singlethread_workqueue(name)				\
 	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
-- 
cgit v1.2.3


From 203b42f7317494ae5e5efc7be6fb7f29c927f102 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Aug 2012 13:18:23 -0700
Subject: workqueue: make deferrable delayed_work initializer names consistent

Initalizers for deferrable delayed_work are confused.

* __DEFERRED_WORK_INITIALIZER()
* DECLARE_DEFERRED_WORK()
* INIT_DELAYED_WORK_DEFERRABLE()

Rename them to

* __DEFERRABLE_WORK_INITIALIZER()
* DECLARE_DEFERRABLE_WORK()
* INIT_DEFERRABLE_WORK()

This patch doesn't cause any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/powerpc/platforms/cell/cpufreq_spudemand.c | 2 +-
 drivers/cpufreq/cpufreq_conservative.c          | 2 +-
 drivers/cpufreq/cpufreq_ondemand.c              | 2 +-
 drivers/devfreq/devfreq.c                       | 2 +-
 drivers/net/ethernet/mellanox/mlx4/sense.c      | 2 +-
 drivers/power/ab8500_btemp.c                    | 2 +-
 drivers/power/ab8500_charger.c                  | 8 ++++----
 drivers/power/ab8500_fg.c                       | 8 ++++----
 drivers/power/abx500_chargalg.c                 | 4 ++--
 drivers/power/max17040_battery.c                | 2 +-
 drivers/video/omap2/displays/panel-taal.c       | 6 +++---
 drivers/video/omap2/dss/dsi.c                   | 4 ++--
 include/linux/workqueue.h                       | 8 ++++----
 mm/slab.c                                       | 2 +-
 mm/vmstat.c                                     | 2 +-
 net/core/neighbour.c                            | 2 +-
 net/ipv4/inetpeer.c                             | 2 +-
 net/sunrpc/cache.c                              | 2 +-
 18 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 23bc9db4317e..82607d621aca 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -76,7 +76,7 @@ static void spu_gov_work(struct work_struct *work)
 static void spu_gov_init_work(struct spu_gov_info_struct *info)
 {
 	int delay = usecs_to_jiffies(info->poll_int);
-	INIT_DELAYED_WORK_DEFERRABLE(&info->work, spu_gov_work);
+	INIT_DEFERRABLE_WORK(&info->work, spu_gov_work);
 	schedule_delayed_work_on(info->policy->cpu, &info->work, delay);
 }
 
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 235a340e81f2..55f0354864e2 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -466,7 +466,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 	delay -= jiffies % delay;
 
 	dbs_info->enable = 1;
-	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+	INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
 	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
 }
 
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 836e9b062e5e..14c1af5a264f 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -644,7 +644,7 @@ static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info)
 		delay -= jiffies % delay;
 
 	dbs_info->sample_type = DBS_NORMAL_SAMPLE;
-	INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer);
+	INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer);
 	schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay);
 }
 
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 70c31d43fff3..b146d76f04cf 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -607,7 +607,7 @@ static int __init devfreq_start_polling(void)
 	mutex_lock(&devfreq_list_lock);
 	polling = false;
 	devfreq_wq = create_freezable_workqueue("devfreq_wq");
-	INIT_DELAYED_WORK_DEFERRABLE(&devfreq_work, devfreq_monitor);
+	INIT_DEFERRABLE_WORK(&devfreq_work, devfreq_monitor);
 	mutex_unlock(&devfreq_list_lock);
 
 	devfreq_monitor(&devfreq_work.work);
diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c
index 802498293528..37b23780a4d8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/sense.c
+++ b/drivers/net/ethernet/mellanox/mlx4/sense.c
@@ -153,5 +153,5 @@ void  mlx4_sense_init(struct mlx4_dev *dev)
 	for (port = 1; port <= dev->caps.num_ports; port++)
 		sense->do_sense_port[port] = 1;
 
-	INIT_DELAYED_WORK_DEFERRABLE(&sense->sense_poll, mlx4_sense_port);
+	INIT_DEFERRABLE_WORK(&sense->sense_poll, mlx4_sense_port);
 }
diff --git a/drivers/power/ab8500_btemp.c b/drivers/power/ab8500_btemp.c
index bba3ccac72fe..3041514f4d3f 100644
--- a/drivers/power/ab8500_btemp.c
+++ b/drivers/power/ab8500_btemp.c
@@ -1018,7 +1018,7 @@ static int __devinit ab8500_btemp_probe(struct platform_device *pdev)
 	}
 
 	/* Init work for measuring temperature periodically */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->btemp_periodic_work,
+	INIT_DEFERRABLE_WORK(&di->btemp_periodic_work,
 		ab8500_btemp_periodic_work);
 
 	/* Identify the battery */
diff --git a/drivers/power/ab8500_charger.c b/drivers/power/ab8500_charger.c
index d4f0c98428cb..0701dbc2b7e1 100644
--- a/drivers/power/ab8500_charger.c
+++ b/drivers/power/ab8500_charger.c
@@ -2618,9 +2618,9 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	}
 
 	/* Init work for HW failure check */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->check_hw_failure_work,
+	INIT_DEFERRABLE_WORK(&di->check_hw_failure_work,
 		ab8500_charger_check_hw_failure_work);
-	INIT_DELAYED_WORK_DEFERRABLE(&di->check_usbchgnotok_work,
+	INIT_DEFERRABLE_WORK(&di->check_usbchgnotok_work,
 		ab8500_charger_check_usbchargernotok_work);
 
 	/*
@@ -2632,10 +2632,10 @@ static int __devinit ab8500_charger_probe(struct platform_device *pdev)
 	 * watchdog have to be kicked by the charger driver
 	 * when the AC charger is disabled
 	 */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->kick_wd_work,
+	INIT_DEFERRABLE_WORK(&di->kick_wd_work,
 		ab8500_charger_kick_watchdog_work);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&di->check_vbat_work,
+	INIT_DEFERRABLE_WORK(&di->check_vbat_work,
 		ab8500_charger_check_vbat_work);
 
 	/* Init work for charger detection */
diff --git a/drivers/power/ab8500_fg.c b/drivers/power/ab8500_fg.c
index bf022255994c..5c9e7c263c38 100644
--- a/drivers/power/ab8500_fg.c
+++ b/drivers/power/ab8500_fg.c
@@ -2516,19 +2516,19 @@ static int __devinit ab8500_fg_probe(struct platform_device *pdev)
 	INIT_WORK(&di->fg_acc_cur_work, ab8500_fg_acc_cur_work);
 
 	/* Init work for reinitialising the fg algorithm */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_reinit_work,
+	INIT_DEFERRABLE_WORK(&di->fg_reinit_work,
 		ab8500_fg_reinit_work);
 
 	/* Work delayed Queue to run the state machine */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_periodic_work,
+	INIT_DEFERRABLE_WORK(&di->fg_periodic_work,
 		ab8500_fg_periodic_work);
 
 	/* Work to check low battery condition */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_low_bat_work,
+	INIT_DEFERRABLE_WORK(&di->fg_low_bat_work,
 		ab8500_fg_low_bat_work);
 
 	/* Init work for HW failure check */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->fg_check_hw_failure_work,
+	INIT_DEFERRABLE_WORK(&di->fg_check_hw_failure_work,
 		ab8500_fg_check_hw_failure_work);
 
 	/* Initialize OVV, and other registers */
diff --git a/drivers/power/abx500_chargalg.c b/drivers/power/abx500_chargalg.c
index 804b88c760d6..4d302803ffcc 100644
--- a/drivers/power/abx500_chargalg.c
+++ b/drivers/power/abx500_chargalg.c
@@ -1848,9 +1848,9 @@ static int __devinit abx500_chargalg_probe(struct platform_device *pdev)
 	}
 
 	/* Init work for chargalg */
-	INIT_DELAYED_WORK_DEFERRABLE(&di->chargalg_periodic_work,
+	INIT_DEFERRABLE_WORK(&di->chargalg_periodic_work,
 		abx500_chargalg_periodic_work);
-	INIT_DELAYED_WORK_DEFERRABLE(&di->chargalg_wd_work,
+	INIT_DEFERRABLE_WORK(&di->chargalg_wd_work,
 		abx500_chargalg_wd_work);
 
 	/* Init work for chargalg */
diff --git a/drivers/power/max17040_battery.c b/drivers/power/max17040_battery.c
index c284143cfcd7..58e67830143c 100644
--- a/drivers/power/max17040_battery.c
+++ b/drivers/power/max17040_battery.c
@@ -232,7 +232,7 @@ static int __devinit max17040_probe(struct i2c_client *client,
 	max17040_reset(client);
 	max17040_get_version(client);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&chip->work, max17040_work);
+	INIT_DEFERRABLE_WORK(&chip->work, max17040_work);
 	schedule_delayed_work(&chip->work, MAX17040_DELAY);
 
 	return 0;
diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c
index 3f5acc7771da..6b5e6e0e202f 100644
--- a/drivers/video/omap2/displays/panel-taal.c
+++ b/drivers/video/omap2/displays/panel-taal.c
@@ -906,7 +906,7 @@ static int taal_probe(struct omap_dss_device *dssdev)
 		r = -ENOMEM;
 		goto err_wq;
 	}
-	INIT_DELAYED_WORK_DEFERRABLE(&td->esd_work, taal_esd_work);
+	INIT_DEFERRABLE_WORK(&td->esd_work, taal_esd_work);
 	INIT_DELAYED_WORK(&td->ulps_work, taal_ulps_work);
 
 	dev_set_drvdata(&dssdev->dev, td);
@@ -962,8 +962,8 @@ static int taal_probe(struct omap_dss_device *dssdev)
 			goto err_irq;
 		}
 
-		INIT_DELAYED_WORK_DEFERRABLE(&td->te_timeout_work,
-					taal_te_timeout_work_callback);
+		INIT_DEFERRABLE_WORK(&td->te_timeout_work,
+				     taal_te_timeout_work_callback);
 
 		dev_dbg(&dssdev->dev, "Using GPIO TE\n");
 	}
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index b07e8864f82f..fd40f2625051 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -4863,8 +4863,8 @@ static int __init omap_dsihw_probe(struct platform_device *dsidev)
 	mutex_init(&dsi->lock);
 	sema_init(&dsi->bus_lock, 1);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&dsi->framedone_timeout_work,
-			dsi_framedone_timeout_work_callback);
+	INIT_DEFERRABLE_WORK(&dsi->framedone_timeout_work,
+			     dsi_framedone_timeout_work_callback);
 
 #ifdef DSI_CATCH_MISSING_TE
 	init_timer(&dsi->te_timer);
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 26c5b4c63861..49a9c51f9ee3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -139,7 +139,7 @@ struct execute_work {
 				0, (unsigned long)&(n)),		\
 	}
 
-#define __DEFERRED_WORK_INITIALIZER(n, f) {				\
+#define __DEFERRABLE_WORK_INITIALIZER(n, f) {				\
 	.work = __WORK_INITIALIZER((n).work, (f)),			\
 	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn,	\
 				0, (unsigned long)&(n)),		\
@@ -151,8 +151,8 @@ struct execute_work {
 #define DECLARE_DELAYED_WORK(n, f)					\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
 
-#define DECLARE_DEFERRED_WORK(n, f)					\
-	struct delayed_work n = __DEFERRED_WORK_INITIALIZER(n, f)
+#define DECLARE_DEFERRABLE_WORK(n, f)					\
+	struct delayed_work n = __DEFERRABLE_WORK_INITIALIZER(n, f)
 
 /*
  * initialize a work item's function pointer
@@ -232,7 +232,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		(_work)->timer.data = (unsigned long)(_work);		\
 	} while (0)
 
-#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func)			\
+#define INIT_DEFERRABLE_WORK(_work, _func)				\
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
 		init_timer_deferrable(&(_work)->timer);			\
diff --git a/mm/slab.c b/mm/slab.c
index f8b0d539b482..35b5cb0da554 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -900,7 +900,7 @@ static void __cpuinit start_cpu_timer(int cpu)
 	 */
 	if (keventd_up() && reap_work->work.func == NULL) {
 		init_reap_node(cpu);
-		INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
+		INIT_DEFERRABLE_WORK(reap_work, cache_reap);
 		schedule_delayed_work_on(cpu, reap_work,
 					__round_jiffies_relative(HZ, cpu));
 	}
diff --git a/mm/vmstat.c b/mm/vmstat.c
index df7a6748231d..b3e3b9d525d0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1157,7 +1157,7 @@ static void __cpuinit start_cpu_timer(int cpu)
 {
 	struct delayed_work *work = &per_cpu(vmstat_work, cpu);
 
-	INIT_DELAYED_WORK_DEFERRABLE(work, vmstat_update);
+	INIT_DEFERRABLE_WORK(work, vmstat_update);
 	schedule_delayed_work_on(cpu, work, __round_jiffies_relative(HZ, cpu));
 }
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 117afaf51268..112c6e2266e9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1545,7 +1545,7 @@ static void neigh_table_init_no_netlink(struct neigh_table *tbl)
 		panic("cannot allocate neighbour cache hashes");
 
 	rwlock_init(&tbl->lock);
-	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
 	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
 	skb_queue_head_init_class(&tbl->proxy_queue,
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index e1e0a4e8fd34..7b55c8648d4b 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -194,7 +194,7 @@ void __init inet_initpeers(void)
 			0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
 			NULL);
 
-	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
+	INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2afd2a84dc35..2a68bb3db772 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1635,7 +1635,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
 
 void __init cache_initialize(void)
 {
-	INIT_DELAYED_WORK_DEFERRABLE(&cache_cleaner, do_cache_clean);
+	INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean);
 }
 
 int cache_register_net(struct cache_detail *cd, struct net *net)
-- 
cgit v1.2.3


From f991b318cc6627a493b0d317a565bb7c3271f36b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Aug 2012 13:18:23 -0700
Subject: workqueue: clean up delayed_work initializers and add missing one

Reimplement delayed_work initializers using new timer initializers
which take timer flags.  This reduces code duplications and will ease
further initializer changes.  This patch also adds a missing
initializer - INIT_DEFERRABLE_WORK_ONSTACK().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 48 +++++++++++++++++++++++------------------------
 1 file changed, 23 insertions(+), 25 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 49a9c51f9ee3..e84ebb69607d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -133,26 +133,20 @@ struct execute_work {
 	__WORK_INIT_LOCKDEP_MAP(#n, &(n))				\
 	}
 
-#define __DELAYED_WORK_INITIALIZER(n, f) {				\
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),			\
-	.timer = TIMER_INITIALIZER(delayed_work_timer_fn,		\
-				0, (unsigned long)&(n)),		\
-	}
-
-#define __DEFERRABLE_WORK_INITIALIZER(n, f) {				\
-	.work = __WORK_INITIALIZER((n).work, (f)),			\
-	.timer = TIMER_DEFERRED_INITIALIZER(delayed_work_timer_fn,	\
-				0, (unsigned long)&(n)),		\
+	.timer = __TIMER_INITIALIZER(delayed_work_timer_fn,		\
+			0, (unsigned long)&(n), (tflags)),		\
 	}
 
 #define DECLARE_WORK(n, f)						\
 	struct work_struct n = __WORK_INITIALIZER(n, f)
 
 #define DECLARE_DELAYED_WORK(n, f)					\
-	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
 
 #define DECLARE_DEFERRABLE_WORK(n, f)					\
-	struct delayed_work n = __DEFERRABLE_WORK_INITIALIZER(n, f)
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE)
 
 /*
  * initialize a work item's function pointer
@@ -216,29 +210,33 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__INIT_WORK((_work), (_func), 1);			\
 	} while (0)
 
-#define INIT_DELAYED_WORK(_work, _func)					\
+#define __INIT_DELAYED_WORK(_work, _func, _tflags)			\
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
-		init_timer(&(_work)->timer);				\
-		(_work)->timer.function = delayed_work_timer_fn;	\
-		(_work)->timer.data = (unsigned long)(_work);		\
+		__setup_timer(&(_work)->timer, delayed_work_timer_fn,	\
+			      (unsigned long)(_work), (_tflags));	\
 	} while (0)
 
-#define INIT_DELAYED_WORK_ONSTACK(_work, _func)				\
+#define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags)		\
 	do {								\
 		INIT_WORK_ONSTACK(&(_work)->work, (_func));		\
-		init_timer_on_stack(&(_work)->timer);			\
-		(_work)->timer.function = delayed_work_timer_fn;	\
-		(_work)->timer.data = (unsigned long)(_work);		\
+		__setup_timer_on_stack(&(_work)->timer,			\
+				       delayed_work_timer_fn,		\
+				       (unsigned long)(_work),		\
+				       (_tflags));			\
 	} while (0)
 
+#define INIT_DELAYED_WORK(_work, _func)					\
+	__INIT_DELAYED_WORK(_work, _func, 0)
+
+#define INIT_DELAYED_WORK_ONSTACK(_work, _func)				\
+	__INIT_DELAYED_WORK_ONSTACK(_work, _func, 0)
+
 #define INIT_DEFERRABLE_WORK(_work, _func)				\
-	do {								\
-		INIT_WORK(&(_work)->work, (_func));			\
-		init_timer_deferrable(&(_work)->timer);			\
-		(_work)->timer.function = delayed_work_timer_fn;	\
-		(_work)->timer.data = (unsigned long)(_work);		\
-	} while (0)
+	__INIT_DELAYED_WORK(_work, _func, TIMER_DEFERRABLE)
+
+#define INIT_DEFERRABLE_WORK_ONSTACK(_work, _func)			\
+	__INIT_DELAYED_WORK_ONSTACK(_work, _func, TIMER_DEFERRABLE)
 
 /**
  * work_pending - Find out whether a work item is currently pending
-- 
cgit v1.2.3


From e0aecdd874d78b7129a64b056c20e529e2c916df Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: use irqsafe timer for delayed_work

Up to now, for delayed_works, try_to_grab_pending() couldn't be used
from IRQ handlers because IRQs may happen while
delayed_work_timer_fn() is in progress leading to indefinite -EAGAIN.

This patch makes delayed_work use the new TIMER_IRQSAFE flag for
delayed_work->timer.  This makes try_to_grab_pending() and thus
mod_delayed_work_on() safe to call from IRQ handlers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  8 +++++---
 kernel/workqueue.c        | 20 +++++++++++---------
 2 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e84ebb69607d..d86b320319e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -136,7 +136,8 @@ struct execute_work {
 #define __DELAYED_WORK_INITIALIZER(n, f, tflags) {			\
 	.work = __WORK_INITIALIZER((n).work, (f)),			\
 	.timer = __TIMER_INITIALIZER(delayed_work_timer_fn,		\
-			0, (unsigned long)&(n), (tflags)),		\
+				     0, (unsigned long)&(n),		\
+				     (tflags) | TIMER_IRQSAFE),		\
 	}
 
 #define DECLARE_WORK(n, f)						\
@@ -214,7 +215,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 	do {								\
 		INIT_WORK(&(_work)->work, (_func));			\
 		__setup_timer(&(_work)->timer, delayed_work_timer_fn,	\
-			      (unsigned long)(_work), (_tflags));	\
+			      (unsigned long)(_work),			\
+			      (_tflags) | TIMER_IRQSAFE);		\
 	} while (0)
 
 #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags)		\
@@ -223,7 +225,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__setup_timer_on_stack(&(_work)->timer,			\
 				       delayed_work_timer_fn,		\
 				       (unsigned long)(_work),		\
-				       (_tflags));			\
+				       (_tflags) | TIMER_IRQSAFE);	\
 	} while (0)
 
 #define INIT_DELAYED_WORK(_work, _func)					\
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 85bd3409b9f5..b394df8beaee 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1048,16 +1048,14 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
  *		for arbitrarily long
  *
  * On >= 0 return, the caller owns @work's PENDING bit.  To avoid getting
- * preempted while holding PENDING and @work off queue, preemption must be
- * disabled on entry.  This ensures that we don't return -EAGAIN while
- * another task is preempted in this function.
+ * interrupted while holding PENDING and @work off queue, irq must be
+ * disabled on entry.  This, combined with delayed_work->timer being
+ * irqsafe, ensures that we return -EAGAIN for finite short period of time.
  *
  * On successful return, >= 0, irq is disabled and the caller is
  * responsible for releasing it using local_irq_restore(*@flags).
  *
- * This function is safe to call from any context other than IRQ handler.
- * An IRQ handler may run on top of delayed_work_timer_fn() which can make
- * this function return -EAGAIN perpetually.
+ * This function is safe to call from any context including IRQ handler.
  */
 static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
@@ -1072,6 +1070,11 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 	if (is_dwork) {
 		struct delayed_work *dwork = to_delayed_work(work);
 
+		/*
+		 * dwork->timer is irqsafe.  If del_timer() fails, it's
+		 * guaranteed that the timer is not queued anywhere and not
+		 * running on the local CPU.
+		 */
 		if (likely(del_timer(&dwork->timer)))
 			return 1;
 	}
@@ -1327,9 +1330,8 @@ void delayed_work_timer_fn(unsigned long __data)
 	struct delayed_work *dwork = (struct delayed_work *)__data;
 	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
-	local_irq_disable();
+	/* should have been called from irqsafe timer with irq already off */
 	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
-	local_irq_enable();
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
@@ -1444,7 +1446,7 @@ EXPORT_SYMBOL_GPL(queue_delayed_work);
  * Returns %false if @dwork was idle and queued, %true if @dwork was
  * pending and its timer was modified.
  *
- * This function is safe to call from any context other than IRQ handler.
+ * This function is safe to call from any context including IRQ handler.
  * See try_to_grab_pending() for details.
  */
 bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
-- 
cgit v1.2.3


From 57b30ae77bf00d2318df711ef9a4d2a9be0a3a2a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: reimplement cancel_delayed_work() using
 try_to_grab_pending()

cancel_delayed_work() can't be called from IRQ handlers due to its use
of del_timer_sync() and can't cancel work items which are already
transferred from timer to worklist.

Also, unlike other flush and cancel functions, a canceled delayed_work
would still point to the last associated cpu_workqueue.  If the
workqueue is destroyed afterwards and the work item is re-used on a
different workqueue, the queueing code can oops trying to dereference
already freed cpu_workqueue.

This patch reimplements cancel_delayed_work() using
try_to_grab_pending() and set_work_cpu_and_clear_pending().  This
allows the function to be called from IRQ handlers and makes its
behavior consistent with other flush / cancel functions.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/workqueue.h | 17 +----------------
 kernel/workqueue.c        | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d86b320319e0..4898289564ab 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -420,6 +420,7 @@ extern bool flush_work(struct work_struct *work);
 extern bool cancel_work_sync(struct work_struct *work);
 
 extern bool flush_delayed_work(struct delayed_work *dwork);
+extern bool cancel_delayed_work(struct delayed_work *dwork);
 extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
@@ -428,22 +429,6 @@ extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
 extern unsigned int work_cpu(struct work_struct *work);
 extern unsigned int work_busy(struct work_struct *work);
 
-/*
- * Kill off a pending schedule_delayed_work().  Note that the work callback
- * function may still be running on return from cancel_delayed_work(), unless
- * it returns 1 and the work doesn't re-arm itself. Run flush_workqueue() or
- * cancel_work_sync() to wait on it.
- */
-static inline bool cancel_delayed_work(struct delayed_work *work)
-{
-	bool ret;
-
-	ret = del_timer_sync(&work->timer);
-	if (ret)
-		work_clear_pending(&work->work);
-	return ret;
-}
-
 /*
  * Like above, but uses del_timer() instead of del_timer_sync(). This means,
  * if it returns 0 the timer function may be running and the queueing is in
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b394df8beaee..039d0fae171a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2948,6 +2948,36 @@ bool flush_delayed_work(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(flush_delayed_work);
 
+/**
+ * cancel_delayed_work - cancel a delayed work
+ * @dwork: delayed_work to cancel
+ *
+ * Kill off a pending delayed_work.  Returns %true if @dwork was pending
+ * and canceled; %false if wasn't pending.  Note that the work callback
+ * function may still be running on return, unless it returns %true and the
+ * work doesn't re-arm itself.  Explicitly flush or use
+ * cancel_delayed_work_sync() to wait on it.
+ *
+ * This function is safe to call from any context including IRQ handler.
+ */
+bool cancel_delayed_work(struct delayed_work *dwork)
+{
+	unsigned long flags;
+	int ret;
+
+	do {
+		ret = try_to_grab_pending(&dwork->work, true, &flags);
+	} while (unlikely(ret == -EAGAIN));
+
+	if (unlikely(ret < 0))
+		return false;
+
+	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+	local_irq_restore(flags);
+	return true;
+}
+EXPORT_SYMBOL(cancel_delayed_work);
+
 /**
  * cancel_delayed_work_sync - cancel a delayed work and wait for it to finish
  * @dwork: the delayed work cancel
-- 
cgit v1.2.3


From 136b5721d75a62a8f02c601c89122e32c1a85a84 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Aug 2012 13:18:24 -0700
Subject: workqueue: deprecate __cancel_delayed_work()

Now that cancel_delayed_work() can be safely called from IRQ handlers,
there's no reason to use __cancel_delayed_work().  Use
cancel_delayed_work() instead of __cancel_delayed_work() and mark the
latter deprecated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Roland Dreier <roland@kernel.org>
Cc: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 block/blk-core.c              | 2 +-
 drivers/block/floppy.c        | 2 +-
 drivers/infiniband/core/mad.c | 2 +-
 drivers/video/omap2/dss/dsi.c | 2 +-
 include/linux/workqueue.h     | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/block/blk-core.c b/block/blk-core.c
index 4b8b606dbb01..dc04a9013027 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -262,7 +262,7 @@ EXPORT_SYMBOL(blk_start_queue);
  **/
 void blk_stop_queue(struct request_queue *q)
 {
-	__cancel_delayed_work(&q->delay_work);
+	cancel_delayed_work(&q->delay_work);
 	queue_flag_set(QUEUE_FLAG_STOPPED, q);
 }
 EXPORT_SYMBOL(blk_stop_queue);
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 55a5bc002c06..17c675c52295 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -890,7 +890,7 @@ static void unlock_fdc(void)
 
 	raw_cmd = NULL;
 	command_status = FD_COMMAND_NONE;
-	__cancel_delayed_work(&fd_timeout);
+	cancel_delayed_work(&fd_timeout);
 	do_floppy = NULL;
 	cont = NULL;
 	clear_bit(0, &fdc_busy);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index b5938147fc89..dc3fd1e8af07 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2004,7 +2004,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
 	unsigned long delay;
 
 	if (list_empty(&mad_agent_priv->wait_list)) {
-		__cancel_delayed_work(&mad_agent_priv->timed_work);
+		cancel_delayed_work(&mad_agent_priv->timed_work);
 	} else {
 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
 					 struct ib_mad_send_wr_private,
diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c
index fd40f2625051..05ee04667af1 100644
--- a/drivers/video/omap2/dss/dsi.c
+++ b/drivers/video/omap2/dss/dsi.c
@@ -4306,7 +4306,7 @@ static void dsi_framedone_irq_callback(void *data, u32 mask)
 	 * and is sending the data.
 	 */
 
-	__cancel_delayed_work(&dsi->framedone_timeout_work);
+	cancel_delayed_work(&dsi->framedone_timeout_work);
 
 	dsi_handle_framedone(dsidev, 0);
 }
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4898289564ab..2b58905d3504 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -434,7 +434,7 @@ extern unsigned int work_busy(struct work_struct *work);
  * if it returns 0 the timer function may be running and the queueing is in
  * progress.
  */
-static inline bool __cancel_delayed_work(struct delayed_work *work)
+static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work)
 {
 	bool ret;
 
-- 
cgit v1.2.3


From e2905b29122173b72b612c962b138e3fa07476b8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:32 -0800
Subject: workqueue: unexport work_cpu()

This function no longer has any external users.  Unexport it.  It will
be removed later on.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 1 -
 kernel/workqueue.c        | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 2b58905d3504..ff68b1d95b41 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -426,7 +426,6 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
 extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
-extern unsigned int work_cpu(struct work_struct *work);
 extern unsigned int work_busy(struct work_struct *work);
 
 /*
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2ffa240052fa..fe0745f54fcd 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -449,6 +449,7 @@ static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = {
 };
 
 static int worker_thread(void *__worker);
+static unsigned int work_cpu(struct work_struct *work);
 
 static int worker_pool_pri(struct worker_pool *pool)
 {
@@ -3419,13 +3420,12 @@ EXPORT_SYMBOL_GPL(workqueue_congested);
  * RETURNS:
  * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
  */
-unsigned int work_cpu(struct work_struct *work)
+static unsigned int work_cpu(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
 
 	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
 }
-EXPORT_SYMBOL_GPL(work_cpu);
 
 /**
  * work_busy - test whether a work is currently pending or running
-- 
cgit v1.2.3


From 715b06b864c99a18cb8368dfb187da4f569788cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: workqueue: introduce WORK_OFFQ_CPU_NONE

Currently, when a work item is off queue, high bits of its data
encodes the last CPU it was on.  This is scheduled to be changed to
pool ID, which will make it impossible to use WORK_CPU_NONE to
indicate no association.

This patch limits the number of bits which are used for off-queue cpu
number to 31 (so that the max fits in an int) and uses the highest
possible value - WORK_OFFQ_CPU_NONE - to indicate no association.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 10 +++++++++-
 kernel/workqueue.c        |  4 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index ff68b1d95b41..f8b35763e55f 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -73,13 +73,21 @@ enum {
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
 
+	/*
+	 * When a work item is off queue, its high bits point to the last
+	 * cpu it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no cpu is associated.
+	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
 	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
+	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1a686e481132..405282d30046 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -573,7 +573,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
 
 	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_CPU_NONE)
+	if (cpu == WORK_OFFQ_CPU_NONE)
 		return NULL;
 
 	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
@@ -583,7 +583,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
 static void mark_work_canceling(struct work_struct *work)
 {
 	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
+	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
 
 	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
 		      WORK_STRUCT_PENDING);
-- 
cgit v1.2.3


From 7c3eed5cd60d0f736516e6ade77d90c6255860bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 24 Jan 2013 11:01:33 -0800
Subject: workqueue: record pool ID instead of CPU in work->data when off-queue

Currently, when a work item is off-queue, work->data records the CPU
it was last on, which is used to locate the last executing instance
for non-reentrance, flushing, etc.

We're in the process of removing global_cwq and making worker_pool the
top level abstraction.  This patch makes work->data point to the pool
it was last associated with instead of CPU.

After the previous WORK_OFFQ_POOL_CPU and worker_poo->id additions,
the conversion is fairly straight-forward.  WORK_OFFQ constants and
functions are modified to record and read back pool ID instead.
worker_pool_by_id() is added to allow looking up pool from ID.
get_work_pool() replaces get_work_gcwq(), which is reimplemented using
get_work_pool().  get_work_pool_id() replaces work_cpu().

This patch shouldn't introduce any observable behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  18 ++++----
 kernel/workqueue.c        | 111 ++++++++++++++++++++++++++++------------------
 2 files changed, 76 insertions(+), 53 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f8b35763e55f..a94e4e84e7b1 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -75,19 +75,19 @@ enum {
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
-	 * cpu it was on.  Cap at 31 bits and use the highest number to
-	 * indicate that no cpu is associated.
+	 * pool it was on.  Cap at 31 bits and use the highest number to
+	 * indicate that no pool is associated.
 	 */
 	WORK_OFFQ_FLAG_BITS	= 1,
-	WORK_OFFQ_CPU_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
-	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT,
-	WORK_OFFQ_CPU_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
-	WORK_OFFQ_CPU_NONE	= (1LU << WORK_OFFQ_CPU_BITS) - 1,
+	WORK_OFFQ_POOL_SHIFT	= WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
+	WORK_OFFQ_LEFT		= BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT,
+	WORK_OFFQ_POOL_BITS	= WORK_OFFQ_LEFT <= 31 ? WORK_OFFQ_LEFT : 31,
+	WORK_OFFQ_POOL_NONE	= (1LU << WORK_OFFQ_POOL_BITS) - 1,
 
 	/* convenience constants */
 	WORK_STRUCT_FLAG_MASK	= (1UL << WORK_STRUCT_FLAG_BITS) - 1,
 	WORK_STRUCT_WQ_DATA_MASK = ~WORK_STRUCT_FLAG_MASK,
-	WORK_STRUCT_NO_CPU	= (unsigned long)WORK_OFFQ_CPU_NONE << WORK_OFFQ_CPU_SHIFT,
+	WORK_STRUCT_NO_POOL	= (unsigned long)WORK_OFFQ_POOL_NONE << WORK_OFFQ_POOL_SHIFT,
 
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
@@ -103,9 +103,9 @@ struct work_struct {
 #endif
 };
 
-#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU)
+#define WORK_DATA_INIT()	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL)
 #define WORK_DATA_STATIC_INIT()	\
-	ATOMIC_LONG_INIT(WORK_STRUCT_NO_CPU | WORK_STRUCT_STATIC)
+	ATOMIC_LONG_INIT(WORK_STRUCT_NO_POOL | WORK_STRUCT_STATIC)
 
 struct delayed_work {
 	struct work_struct work;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9c6ad974bb9e..a4d7e3f0a874 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -451,7 +451,6 @@ static DEFINE_MUTEX(worker_pool_idr_mutex);
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
-static unsigned int work_cpu(struct work_struct *work);
 
 static int std_worker_pool_pri(struct worker_pool *pool)
 {
@@ -479,6 +478,15 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
+/*
+ * Lookup worker_pool by id.  The idr currently is built during boot and
+ * never modified.  Don't worry about locking for now.
+ */
+static struct worker_pool *worker_pool_by_id(int pool_id)
+{
+	return idr_find(&worker_pool_idr, pool_id);
+}
+
 static atomic_t *get_pool_nr_running(struct worker_pool *pool)
 {
 	int cpu = pool->gcwq->cpu;
@@ -520,17 +528,17 @@ static int work_next_color(int color)
 /*
  * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
  * contain the pointer to the queued cwq.  Once execution starts, the flag
- * is cleared and the high bits contain OFFQ flags and CPU number.
+ * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, cpu or clear
+ * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the cwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
- * a work.  gcwq is available once the work has been queued anywhere after
- * initialization until it is sync canceled.  cwq is available only while
- * the work item is queued.
+ * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * corresponding to a work.  Pool is available once the work has been
+ * queued anywhere after initialization until it is sync canceled.  cwq is
+ * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
  * canceled.  While being canceled, a work item may have its PENDING set
@@ -552,8 +560,8 @@ static void set_work_cwq(struct work_struct *work,
 		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
 }
 
-static void set_work_cpu_and_clear_pending(struct work_struct *work,
-					   unsigned int cpu)
+static void set_work_pool_and_clear_pending(struct work_struct *work,
+					    int pool_id)
 {
 	/*
 	 * The following wmb is paired with the implied mb in
@@ -562,13 +570,13 @@ static void set_work_cpu_and_clear_pending(struct work_struct *work,
 	 * owner.
 	 */
 	smp_wmb();
-	set_work_data(work, (unsigned long)cpu << WORK_OFFQ_CPU_SHIFT, 0);
+	set_work_data(work, (unsigned long)pool_id << WORK_OFFQ_POOL_SHIFT, 0);
 }
 
 static void clear_work_data(struct work_struct *work)
 {
-	smp_wmb();	/* see set_work_cpu_and_clear_pending() */
-	set_work_data(work, WORK_STRUCT_NO_CPU, 0);
+	smp_wmb();	/* see set_work_pool_and_clear_pending() */
+	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
 static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
@@ -581,30 +589,58 @@ static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
 		return NULL;
 }
 
-static struct global_cwq *get_work_gcwq(struct work_struct *work)
+/**
+ * get_work_pool - return the worker_pool a given work was associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool @work was last associated with.  %NULL if none.
+ */
+static struct worker_pool *get_work_pool(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
-	unsigned int cpu;
+	struct worker_pool *pool;
+	int pool_id;
 
 	if (data & WORK_STRUCT_CWQ)
 		return ((struct cpu_workqueue_struct *)
-			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq;
+			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
-	cpu = data >> WORK_OFFQ_CPU_SHIFT;
-	if (cpu == WORK_OFFQ_CPU_NONE)
+	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
+	if (pool_id == WORK_OFFQ_POOL_NONE)
 		return NULL;
 
-	BUG_ON(cpu >= nr_cpu_ids && cpu != WORK_CPU_UNBOUND);
-	return get_gcwq(cpu);
+	pool = worker_pool_by_id(pool_id);
+	WARN_ON_ONCE(!pool);
+	return pool;
+}
+
+/**
+ * get_work_pool_id - return the worker pool ID a given work is associated with
+ * @work: the work item of interest
+ *
+ * Return the worker_pool ID @work was last associated with.
+ * %WORK_OFFQ_POOL_NONE if none.
+ */
+static int get_work_pool_id(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->id : WORK_OFFQ_POOL_NONE;
+}
+
+static struct global_cwq *get_work_gcwq(struct work_struct *work)
+{
+	struct worker_pool *pool = get_work_pool(work);
+
+	return pool ? pool->gcwq : NULL;
 }
 
 static void mark_work_canceling(struct work_struct *work)
 {
-	struct global_cwq *gcwq = get_work_gcwq(work);
-	unsigned long cpu = gcwq ? gcwq->cpu : WORK_OFFQ_CPU_NONE;
+	unsigned long pool_id = get_work_pool_id(work);
 
-	set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
-		      WORK_STRUCT_PENDING);
+	pool_id <<= WORK_OFFQ_POOL_SHIFT;
+	set_work_data(work, pool_id | WORK_OFFQ_CANCELING, WORK_STRUCT_PENDING);
 }
 
 static bool work_is_canceling(struct work_struct *work)
@@ -2192,12 +2228,12 @@ __acquires(&gcwq->lock)
 		wake_up_worker(pool);
 
 	/*
-	 * Record the last CPU and clear PENDING which should be the last
+	 * Record the last pool and clear PENDING which should be the last
 	 * update to @work.  Also, do this inside @gcwq->lock so that
 	 * PENDING and queued state changes happen together while IRQ is
 	 * disabled.
 	 */
-	set_work_cpu_and_clear_pending(work, gcwq->cpu);
+	set_work_pool_and_clear_pending(work, pool->id);
 
 	spin_unlock_irq(&gcwq->lock);
 
@@ -2967,7 +3003,8 @@ bool cancel_delayed_work(struct delayed_work *dwork)
 	if (unlikely(ret < 0))
 		return false;
 
-	set_work_cpu_and_clear_pending(&dwork->work, work_cpu(&dwork->work));
+	set_work_pool_and_clear_pending(&dwork->work,
+					get_work_pool_id(&dwork->work));
 	local_irq_restore(flags);
 	return ret;
 }
@@ -3430,20 +3467,6 @@ bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
-/**
- * work_cpu - return the last known associated cpu for @work
- * @work: the work of interest
- *
- * RETURNS:
- * CPU number if @work was ever queued.  WORK_CPU_NONE otherwise.
- */
-static unsigned int work_cpu(struct work_struct *work)
-{
-	struct global_cwq *gcwq = get_work_gcwq(work);
-
-	return gcwq ? gcwq->cpu : WORK_CPU_NONE;
-}
-
 /**
  * work_busy - test whether a work is currently pending or running
  * @work: the work to be tested
@@ -3816,9 +3839,9 @@ static int __init init_workqueues(void)
 {
 	unsigned int cpu;
 
-	/* make sure we have enough bits for OFFQ CPU number */
-	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_CPU_SHIFT)) <
-		     WORK_CPU_LAST);
+	/* make sure we have enough bits for OFFQ pool ID */
+	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
+		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-- 
cgit v1.2.3


From 6be195886ac26abe0194ed1bc7a9224f8a97c310 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: workqueue: replace WORK_CPU_NONE/LAST with WORK_CPU_END

Now that workqueue has moved away from gcwqs, workqueue no longer has
the need to have a CPU identifier indicating "no cpu associated" - we
now use WORK_OFFQ_POOL_NONE instead - and most uses of WORK_CPU_NONE
are gone.

The only left usage is as the end marker for for_each_*wq*()
iterators, where the name WORK_CPU_NONE is confusing w/o actual
WORK_CPU_NONE usages.  Similarly, WORK_CPU_LAST which equals
WORK_CPU_NONE no longer makes sense.

Replace both WORK_CPU_NONE and LAST with WORK_CPU_END.  This patch
doesn't introduce any functional difference.

tj: s/WORK_CPU_LAST/WORK_CPU_END/ and rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +--
 kernel/workqueue.c        | 10 +++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a94e4e84e7b1..426c39c2aaa4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -57,8 +57,7 @@ enum {
 
 	/* special cpu IDs */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_NONE		= NR_CPUS + 1,
-	WORK_CPU_LAST		= WORK_CPU_NONE,
+	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 577de1073f24..7e11334a119f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -258,7 +258,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 		if (sw & 2)
 			return WORK_CPU_UNBOUND;
 	}
-	return WORK_CPU_NONE;
+	return WORK_CPU_END;
 }
 
 static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
@@ -282,17 +282,17 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  */
 #define for_each_wq_cpu(cpu)						\
 	for ((cpu) = __next_wq_cpu(-1, cpu_possible_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_possible_mask, 3))
 
 #define for_each_online_wq_cpu(cpu)					\
 	for ((cpu) = __next_wq_cpu(-1, cpu_online_mask, 3);		\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
 #define for_each_cwq_cpu(cpu, wq)					\
 	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
-	     (cpu) < WORK_CPU_NONE;					\
+	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
@@ -3796,7 +3796,7 @@ static int __init init_workqueues(void)
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
-		     WORK_CPU_LAST * NR_STD_WORKER_POOLS);
+		     WORK_CPU_END * NR_STD_WORKER_POOLS);
 
 	cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);
 	hotcpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN);
-- 
cgit v1.2.3


From 60c057bca22285efefbba033624763a778f243bf Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 6 Feb 2013 18:04:53 -0800
Subject: workqueue: add delayed_work->wq to simplify reentrancy handling

To avoid executing the same work item from multiple CPUs concurrently,
a work_struct records the last pool it was on in its ->data so that,
on the next queueing, the pool can be queried to determine whether the
work item is still executing or not.

A delayed_work goes through timer before actually being queued on the
target workqueue and the timer needs to know the target workqueue and
CPU.  This is currently achieved by modifying delayed_work->work.data
such that it points to the cwq which points to the target workqueue
and the last CPU the work item was on.  __queue_delayed_work()
extracts the last CPU from delayed_work->work.data and then combines
it with the target workqueue to create new work.data.

The only thing this rather ugly hack achieves is encoding the target
workqueue into delayed_work->work.data without using a separate field,
which could be a trade off one can make; unfortunately, this entangles
work->data management between regular workqueue and delayed_work code
by setting cwq pointer before the work item is actually queued and
becomes a hindrance for further improvements of work->data handling.

This can be easily made sane by adding a target workqueue field to
delayed_work.  While delayed_work is used widely in the kernel and
this does make it a bit larger (<5%), I think this is the right
trade-off especially given the prospect of much saner handling of
work->data which currently involves quite tricky memory barrier
dancing, and don't expect to see any measureable effect.

Add delayed_work->wq and drop the delayed_work->work.data overloading.

tj: Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  3 +++
 kernel/workqueue.c        | 32 +++-----------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 426c39c2aaa4..a3d7556510c3 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -109,6 +109,9 @@ struct work_struct {
 struct delayed_work {
 	struct work_struct work;
 	struct timer_list timer;
+
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
 	int cpu;
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a229a56f3a32..41a502ce3802 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1339,10 +1339,9 @@ EXPORT_SYMBOL_GPL(queue_work);
 void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
-	struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work);
 
 	/* should have been called from irqsafe timer with irq already off */
-	__queue_work(dwork->cpu, cwq->wq, &dwork->work);
+	__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 }
 EXPORT_SYMBOL_GPL(delayed_work_timer_fn);
 
@@ -1351,7 +1350,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 {
 	struct timer_list *timer = &dwork->timer;
 	struct work_struct *work = &dwork->work;
-	unsigned int lcpu;
 
 	WARN_ON_ONCE(timer->function != delayed_work_timer_fn ||
 		     timer->data != (unsigned long)dwork);
@@ -1371,30 +1369,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
 
 	timer_stats_timer_set_start_info(&dwork->timer);
 
-	/*
-	 * This stores cwq for the moment, for the timer_fn.  Note that the
-	 * work's pool is preserved to allow reentrance detection for
-	 * delayed works.
-	 */
-	if (!(wq->flags & WQ_UNBOUND)) {
-		struct worker_pool *pool = get_work_pool(work);
-
-		/*
-		 * If we cannot get the last pool from @work directly,
-		 * select the last CPU such that it avoids unnecessarily
-		 * triggering non-reentrancy check in __queue_work().
-		 */
-		lcpu = cpu;
-		if (pool)
-			lcpu = pool->cpu;
-		if (lcpu == WORK_CPU_UNBOUND)
-			lcpu = raw_smp_processor_id();
-	} else {
-		lcpu = WORK_CPU_UNBOUND;
-	}
-
-	set_work_cwq(work, get_cwq(lcpu, wq), 0);
-
+	dwork->wq = wq;
 	dwork->cpu = cpu;
 	timer->expires = jiffies + delay;
 
@@ -2944,8 +2919,7 @@ bool flush_delayed_work(struct delayed_work *dwork)
 {
 	local_irq_disable();
 	if (del_timer_sync(&dwork->timer))
-		__queue_work(dwork->cpu,
-			     get_work_cwq(&dwork->work)->wq, &dwork->work);
+		__queue_work(dwork->cpu, dwork->wq, &dwork->work);
 	local_irq_enable();
 	return flush_work(&dwork->work);
 }
-- 
cgit v1.2.3


From 112202d9098aae2c36436e5178c0cf3ced423c7b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Feb 2013 19:29:12 -0800
Subject: workqueue: rename cpu_workqueue to pool_workqueue

workqueue has moved away from global_cwqs to worker_pools and with the
scheduled custom worker pools, wforkqueues will be associated with
pools which don't have anything to do with CPUs.  The workqueue code
went through significant amount of changes recently and mass renaming
isn't likely to hurt much additionally.  Let's replace 'cpu' with
'pool' so that it reflects the current design.

* s/struct cpu_workqueue_struct/struct pool_workqueue/
* s/cpu_wq/pool_wq/
* s/cwq/pwq/

This patch is purely cosmetic.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h        |  12 +-
 include/trace/events/workqueue.h |  10 +-
 kernel/workqueue.c               | 433 +++++++++++++++++++--------------------
 kernel/workqueue_internal.h      |   2 +-
 4 files changed, 228 insertions(+), 229 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a3d7556510c3..8afab27cdbc2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -27,7 +27,7 @@ void delayed_work_timer_fn(unsigned long __data);
 enum {
 	WORK_STRUCT_PENDING_BIT	= 0,	/* work item is pending execution */
 	WORK_STRUCT_DELAYED_BIT	= 1,	/* work item is delayed */
-	WORK_STRUCT_CWQ_BIT	= 2,	/* data points to cwq */
+	WORK_STRUCT_PWQ_BIT	= 2,	/* data points to pwq */
 	WORK_STRUCT_LINKED_BIT	= 3,	/* next work is linked to this one */
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC_BIT	= 4,	/* static initializer (debugobjects) */
@@ -40,7 +40,7 @@ enum {
 
 	WORK_STRUCT_PENDING	= 1 << WORK_STRUCT_PENDING_BIT,
 	WORK_STRUCT_DELAYED	= 1 << WORK_STRUCT_DELAYED_BIT,
-	WORK_STRUCT_CWQ		= 1 << WORK_STRUCT_CWQ_BIT,
+	WORK_STRUCT_PWQ		= 1 << WORK_STRUCT_PWQ_BIT,
 	WORK_STRUCT_LINKED	= 1 << WORK_STRUCT_LINKED_BIT,
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 	WORK_STRUCT_STATIC	= 1 << WORK_STRUCT_STATIC_BIT,
@@ -60,14 +60,14 @@ enum {
 	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
-	 * Reserve 7 bits off of cwq pointer w/ debugobjects turned
-	 * off.  This makes cwqs aligned to 256 bytes and allows 15
-	 * workqueue flush colors.
+	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
+	 * This makes pwqs aligned to 256 bytes and allows 15 workqueue
+	 * flush colors.
 	 */
 	WORK_STRUCT_FLAG_BITS	= WORK_STRUCT_COLOR_SHIFT +
 				  WORK_STRUCT_COLOR_BITS,
 
-	/* data contains off-queue information when !WORK_STRUCT_CWQ */
+	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 4e798e384a6a..bf0e18ba6cfb 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -27,7 +27,7 @@ DECLARE_EVENT_CLASS(workqueue_work,
 /**
  * workqueue_queue_work - called when a work gets queued
  * @req_cpu:	the requested cpu
- * @cwq:	pointer to struct cpu_workqueue_struct
+ * @pwq:	pointer to struct pool_workqueue
  * @work:	pointer to struct work_struct
  *
  * This event occurs when a work is queued immediately or once a
@@ -36,10 +36,10 @@ DECLARE_EVENT_CLASS(workqueue_work,
  */
 TRACE_EVENT(workqueue_queue_work,
 
-	TP_PROTO(unsigned int req_cpu, struct cpu_workqueue_struct *cwq,
+	TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
 		 struct work_struct *work),
 
-	TP_ARGS(req_cpu, cwq, work),
+	TP_ARGS(req_cpu, pwq, work),
 
 	TP_STRUCT__entry(
 		__field( void *,	work	)
@@ -52,9 +52,9 @@ TRACE_EVENT(workqueue_queue_work,
 	TP_fast_assign(
 		__entry->work		= work;
 		__entry->function	= work->func;
-		__entry->workqueue	= cwq->wq;
+		__entry->workqueue	= pwq->wq;
 		__entry->req_cpu	= req_cpu;
-		__entry->cpu		= cwq->pool->cpu;
+		__entry->cpu		= pwq->pool->cpu;
 	),
 
 	TP_printk("work struct=%p function=%pf workqueue=%p req_cpu=%u cpu=%u",
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ea7f696f1060..0f1a264d0f22 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -154,11 +154,12 @@ struct worker_pool {
 } ____cacheline_aligned_in_smp;
 
 /*
- * The per-CPU workqueue.  The lower WORK_STRUCT_FLAG_BITS of
- * work_struct->data are used for flags and thus cwqs need to be
- * aligned at two's power of the number of flag bits.
+ * The per-pool workqueue.  While queued, the lower WORK_STRUCT_FLAG_BITS
+ * of work_struct->data are used for flags and the remaining high bits
+ * point to the pwq; thus, pwqs need to be aligned at two's power of the
+ * number of flag bits.
  */
-struct cpu_workqueue_struct {
+struct pool_workqueue {
 	struct worker_pool	*pool;		/* I: the associated pool */
 	struct workqueue_struct *wq;		/* I: the owning workqueue */
 	int			work_color;	/* L: current color */
@@ -207,16 +208,16 @@ typedef unsigned long mayday_mask_t;
 struct workqueue_struct {
 	unsigned int		flags;		/* W: WQ_* flags */
 	union {
-		struct cpu_workqueue_struct __percpu	*pcpu;
-		struct cpu_workqueue_struct		*single;
+		struct pool_workqueue __percpu		*pcpu;
+		struct pool_workqueue			*single;
 		unsigned long				v;
-	} cpu_wq;				/* I: cwq's */
+	} pool_wq;				/* I: pwq's */
 	struct list_head	list;		/* W: list of all workqueues */
 
 	struct mutex		flush_mutex;	/* protects wq flushing */
 	int			work_color;	/* F: current work color */
 	int			flush_color;	/* F: current flush color */
-	atomic_t		nr_cwqs_to_flush; /* flush in progress */
+	atomic_t		nr_pwqs_to_flush; /* flush in progress */
 	struct wq_flusher	*first_flusher;	/* F: first flusher */
 	struct list_head	flusher_queue;	/* F: flush waiters */
 	struct list_head	flusher_overflow; /* F: flush overflow list */
@@ -225,7 +226,7 @@ struct workqueue_struct {
 	struct worker		*rescuer;	/* I: rescue worker */
 
 	int			nr_drainers;	/* W: drain in progress */
-	int			saved_max_active; /* W: saved cwq max_active */
+	int			saved_max_active; /* W: saved pwq max_active */
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -268,7 +269,7 @@ static inline int __next_wq_cpu(int cpu, const struct cpumask *mask,
 	return WORK_CPU_END;
 }
 
-static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
+static inline int __next_pwq_cpu(int cpu, const struct cpumask *mask,
 				 struct workqueue_struct *wq)
 {
 	return __next_wq_cpu(cpu, mask, !(wq->flags & WQ_UNBOUND) ? 1 : 2);
@@ -284,7 +285,7 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
  *
  * for_each_wq_cpu()		: possible CPUs + WORK_CPU_UNBOUND
  * for_each_online_wq_cpu()	: online CPUs + WORK_CPU_UNBOUND
- * for_each_cwq_cpu()		: possible CPUs for bound workqueues,
+ * for_each_pwq_cpu()		: possible CPUs for bound workqueues,
  *				  WORK_CPU_UNBOUND for unbound workqueues
  */
 #define for_each_wq_cpu(cpu)						\
@@ -297,10 +298,10 @@ static inline int __next_cwq_cpu(int cpu, const struct cpumask *mask,
 	     (cpu) < WORK_CPU_END;					\
 	     (cpu) = __next_wq_cpu((cpu), cpu_online_mask, 3))
 
-#define for_each_cwq_cpu(cpu, wq)					\
-	for ((cpu) = __next_cwq_cpu(-1, cpu_possible_mask, (wq));	\
+#define for_each_pwq_cpu(cpu, wq)					\
+	for ((cpu) = __next_pwq_cpu(-1, cpu_possible_mask, (wq));	\
 	     (cpu) < WORK_CPU_END;					\
-	     (cpu) = __next_cwq_cpu((cpu), cpu_possible_mask, (wq)))
+	     (cpu) = __next_pwq_cpu((cpu), cpu_possible_mask, (wq)))
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
@@ -479,14 +480,14 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
-					    struct workqueue_struct *wq)
+static struct pool_workqueue *get_pwq(unsigned int cpu,
+				      struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
-			return per_cpu_ptr(wq->cpu_wq.pcpu, cpu);
+			return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
 	} else if (likely(cpu == WORK_CPU_UNBOUND))
-		return wq->cpu_wq.single;
+		return wq->pool_wq.single;
 	return NULL;
 }
 
@@ -507,18 +508,18 @@ static int work_next_color(int color)
 }
 
 /*
- * While queued, %WORK_STRUCT_CWQ is set and non flag bits of a work's data
- * contain the pointer to the queued cwq.  Once execution starts, the flag
+ * While queued, %WORK_STRUCT_PWQ is set and non flag bits of a work's data
+ * contain the pointer to the queued pwq.  Once execution starts, the flag
  * is cleared and the high bits contain OFFQ flags and pool ID.
  *
- * set_work_cwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
- * and clear_work_data() can be used to set the cwq, pool or clear
+ * set_work_pwq(), set_work_pool_and_clear_pending(), mark_work_canceling()
+ * and clear_work_data() can be used to set the pwq, pool or clear
  * work->data.  These functions should only be called while the work is
  * owned - ie. while the PENDING bit is set.
  *
- * get_work_pool() and get_work_cwq() can be used to obtain the pool or cwq
+ * get_work_pool() and get_work_pwq() can be used to obtain the pool or pwq
  * corresponding to a work.  Pool is available once the work has been
- * queued anywhere after initialization until it is sync canceled.  cwq is
+ * queued anywhere after initialization until it is sync canceled.  pwq is
  * available only while the work item is queued.
  *
  * %WORK_OFFQ_CANCELING is used to mark a work item which is being
@@ -533,12 +534,11 @@ static inline void set_work_data(struct work_struct *work, unsigned long data,
 	atomic_long_set(&work->data, data | flags | work_static(work));
 }
 
-static void set_work_cwq(struct work_struct *work,
-			 struct cpu_workqueue_struct *cwq,
+static void set_work_pwq(struct work_struct *work, struct pool_workqueue *pwq,
 			 unsigned long extra_flags)
 {
-	set_work_data(work, (unsigned long)cwq,
-		      WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags);
+	set_work_data(work, (unsigned long)pwq,
+		      WORK_STRUCT_PENDING | WORK_STRUCT_PWQ | extra_flags);
 }
 
 static void set_work_pool_and_keep_pending(struct work_struct *work,
@@ -567,11 +567,11 @@ static void clear_work_data(struct work_struct *work)
 	set_work_data(work, WORK_STRUCT_NO_POOL, 0);
 }
 
-static struct cpu_workqueue_struct *get_work_cwq(struct work_struct *work)
+static struct pool_workqueue *get_work_pwq(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
+	if (data & WORK_STRUCT_PWQ)
 		return (void *)(data & WORK_STRUCT_WQ_DATA_MASK);
 	else
 		return NULL;
@@ -589,8 +589,8 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
 	struct worker_pool *pool;
 	int pool_id;
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool;
 
 	pool_id = data >> WORK_OFFQ_POOL_SHIFT;
@@ -613,8 +613,8 @@ static int get_work_pool_id(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	if (data & WORK_STRUCT_CWQ)
-		return ((struct cpu_workqueue_struct *)
+	if (data & WORK_STRUCT_PWQ)
+		return ((struct pool_workqueue *)
 			(data & WORK_STRUCT_WQ_DATA_MASK))->pool->id;
 
 	return data >> WORK_OFFQ_POOL_SHIFT;
@@ -632,7 +632,7 @@ static bool work_is_canceling(struct work_struct *work)
 {
 	unsigned long data = atomic_long_read(&work->data);
 
-	return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
+	return !(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_CANCELING);
 }
 
 /*
@@ -961,67 +961,67 @@ static void move_linked_works(struct work_struct *work, struct list_head *head,
 		*nextp = n;
 }
 
-static void cwq_activate_delayed_work(struct work_struct *work)
+static void pwq_activate_delayed_work(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 
 	trace_workqueue_activate_work(work);
-	move_linked_works(work, &cwq->pool->worklist, NULL);
+	move_linked_works(work, &pwq->pool->worklist, NULL);
 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
-	cwq->nr_active++;
+	pwq->nr_active++;
 }
 
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+static void pwq_activate_first_delayed(struct pool_workqueue *pwq)
 {
-	struct work_struct *work = list_first_entry(&cwq->delayed_works,
+	struct work_struct *work = list_first_entry(&pwq->delayed_works,
 						    struct work_struct, entry);
 
-	cwq_activate_delayed_work(work);
+	pwq_activate_delayed_work(work);
 }
 
 /**
- * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
- * @cwq: cwq of interest
+ * pwq_dec_nr_in_flight - decrement pwq's nr_in_flight
+ * @pwq: pwq of interest
  * @color: color of work which left the queue
  *
  * A work either has completed or is removed from pending queue,
- * decrement nr_in_flight of its cwq and handle workqueue flushing.
+ * decrement nr_in_flight of its pwq and handle workqueue flushing.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
+static void pwq_dec_nr_in_flight(struct pool_workqueue *pwq, int color)
 {
 	/* ignore uncolored works */
 	if (color == WORK_NO_COLOR)
 		return;
 
-	cwq->nr_in_flight[color]--;
+	pwq->nr_in_flight[color]--;
 
-	cwq->nr_active--;
-	if (!list_empty(&cwq->delayed_works)) {
+	pwq->nr_active--;
+	if (!list_empty(&pwq->delayed_works)) {
 		/* one down, submit a delayed one */
-		if (cwq->nr_active < cwq->max_active)
-			cwq_activate_first_delayed(cwq);
+		if (pwq->nr_active < pwq->max_active)
+			pwq_activate_first_delayed(pwq);
 	}
 
 	/* is flush in progress and are we at the flushing tip? */
-	if (likely(cwq->flush_color != color))
+	if (likely(pwq->flush_color != color))
 		return;
 
 	/* are there still in-flight works? */
-	if (cwq->nr_in_flight[color])
+	if (pwq->nr_in_flight[color])
 		return;
 
-	/* this cwq is done, clear flush_color */
-	cwq->flush_color = -1;
+	/* this pwq is done, clear flush_color */
+	pwq->flush_color = -1;
 
 	/*
-	 * If this was the last cwq, wake up the first flusher.  It
+	 * If this was the last pwq, wake up the first flusher.  It
 	 * will handle the rest.
 	 */
-	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
-		complete(&cwq->wq->first_flusher->done);
+	if (atomic_dec_and_test(&pwq->wq->nr_pwqs_to_flush))
+		complete(&pwq->wq->first_flusher->done);
 }
 
 /**
@@ -1053,7 +1053,7 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 			       unsigned long *flags)
 {
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	local_irq_save(*flags);
 
@@ -1084,31 +1084,31 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
 
 	spin_lock(&pool->lock);
 	/*
-	 * work->data is guaranteed to point to cwq only while the work
-	 * item is queued on cwq->wq, and both updating work->data to point
-	 * to cwq on queueing and to pool on dequeueing are done under
-	 * cwq->pool->lock.  This in turn guarantees that, if work->data
-	 * points to cwq which is associated with a locked pool, the work
+	 * work->data is guaranteed to point to pwq only while the work
+	 * item is queued on pwq->wq, and both updating work->data to point
+	 * to pwq on queueing and to pool on dequeueing are done under
+	 * pwq->pool->lock.  This in turn guarantees that, if work->data
+	 * points to pwq which is associated with a locked pool, the work
 	 * item is currently queued on that pool.
 	 */
-	cwq = get_work_cwq(work);
-	if (cwq && cwq->pool == pool) {
+	pwq = get_work_pwq(work);
+	if (pwq && pwq->pool == pool) {
 		debug_work_deactivate(work);
 
 		/*
 		 * A delayed work item cannot be grabbed directly because
 		 * it might have linked NO_COLOR work items which, if left
-		 * on the delayed_list, will confuse cwq->nr_active
+		 * on the delayed_list, will confuse pwq->nr_active
 		 * management later on and cause stall.  Make sure the work
 		 * item is activated before grabbing.
 		 */
 		if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
-			cwq_activate_delayed_work(work);
+			pwq_activate_delayed_work(work);
 
 		list_del_init(&work->entry);
-		cwq_dec_nr_in_flight(get_work_cwq(work), get_work_color(work));
+		pwq_dec_nr_in_flight(get_work_pwq(work), get_work_color(work));
 
-		/* work->data points to cwq iff queued, point to pool */
+		/* work->data points to pwq iff queued, point to pool */
 		set_work_pool_and_keep_pending(work, pool->id);
 
 		spin_unlock(&pool->lock);
@@ -1125,25 +1125,24 @@ fail:
 
 /**
  * insert_work - insert a work into a pool
- * @cwq: cwq @work belongs to
+ * @pwq: pwq @work belongs to
  * @work: work to insert
  * @head: insertion point
  * @extra_flags: extra WORK_STRUCT_* flags to set
  *
- * Insert @work which belongs to @cwq after @head.  @extra_flags is or'd to
+ * Insert @work which belongs to @pwq after @head.  @extra_flags is or'd to
  * work_struct flags.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_work(struct cpu_workqueue_struct *cwq,
-			struct work_struct *work, struct list_head *head,
-			unsigned int extra_flags)
+static void insert_work(struct pool_workqueue *pwq, struct work_struct *work,
+			struct list_head *head, unsigned int extra_flags)
 {
-	struct worker_pool *pool = cwq->pool;
+	struct worker_pool *pool = pwq->pool;
 
 	/* we own @work, set data and link */
-	set_work_cwq(work, cwq, extra_flags);
+	set_work_pwq(work, pwq, extra_flags);
 	list_add_tail(&work->entry, head);
 
 	/*
@@ -1170,13 +1169,13 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	 * Return %true iff I'm a worker execuing a work item on @wq.  If
 	 * I'm @worker, it's safe to dereference it without locking.
 	 */
-	return worker && worker->current_cwq->wq == wq;
+	return worker && worker->current_pwq->wq == wq;
 }
 
 static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 	struct list_head *worklist;
 	unsigned int work_flags;
 	unsigned int req_cpu = cpu;
@@ -1196,7 +1195,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 
-	/* determine the cwq to use */
+	/* determine the pwq to use */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		struct worker_pool *last_pool;
 
@@ -1209,54 +1208,54 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
 		 * work needs to be queued on that cpu to guarantee
 		 * non-reentrancy.
 		 */
-		cwq = get_cwq(cpu, wq);
+		pwq = get_pwq(cpu, wq);
 		last_pool = get_work_pool(work);
 
-		if (last_pool && last_pool != cwq->pool) {
+		if (last_pool && last_pool != pwq->pool) {
 			struct worker *worker;
 
 			spin_lock(&last_pool->lock);
 
 			worker = find_worker_executing_work(last_pool, work);
 
-			if (worker && worker->current_cwq->wq == wq) {
-				cwq = get_cwq(last_pool->cpu, wq);
+			if (worker && worker->current_pwq->wq == wq) {
+				pwq = get_pwq(last_pool->cpu, wq);
 			} else {
 				/* meh... not running there, queue here */
 				spin_unlock(&last_pool->lock);
-				spin_lock(&cwq->pool->lock);
+				spin_lock(&pwq->pool->lock);
 			}
 		} else {
-			spin_lock(&cwq->pool->lock);
+			spin_lock(&pwq->pool->lock);
 		}
 	} else {
-		cwq = get_cwq(WORK_CPU_UNBOUND, wq);
-		spin_lock(&cwq->pool->lock);
+		pwq = get_pwq(WORK_CPU_UNBOUND, wq);
+		spin_lock(&pwq->pool->lock);
 	}
 
-	/* cwq determined, queue */
-	trace_workqueue_queue_work(req_cpu, cwq, work);
+	/* pwq determined, queue */
+	trace_workqueue_queue_work(req_cpu, pwq, work);
 
 	if (WARN_ON(!list_empty(&work->entry))) {
-		spin_unlock(&cwq->pool->lock);
+		spin_unlock(&pwq->pool->lock);
 		return;
 	}
 
-	cwq->nr_in_flight[cwq->work_color]++;
-	work_flags = work_color_to_flags(cwq->work_color);
+	pwq->nr_in_flight[pwq->work_color]++;
+	work_flags = work_color_to_flags(pwq->work_color);
 
-	if (likely(cwq->nr_active < cwq->max_active)) {
+	if (likely(pwq->nr_active < pwq->max_active)) {
 		trace_workqueue_activate_work(work);
-		cwq->nr_active++;
-		worklist = &cwq->pool->worklist;
+		pwq->nr_active++;
+		worklist = &pwq->pool->worklist;
 	} else {
 		work_flags |= WORK_STRUCT_DELAYED;
-		worklist = &cwq->delayed_works;
+		worklist = &pwq->delayed_works;
 	}
 
-	insert_work(cwq, work, worklist, work_flags);
+	insert_work(pwq, work, worklist, work_flags);
 
-	spin_unlock(&cwq->pool->lock);
+	spin_unlock(&pwq->pool->lock);
 }
 
 /**
@@ -1661,14 +1660,14 @@ static void rebind_workers(struct worker_pool *pool)
 
 		/*
 		 * wq doesn't really matter but let's keep @worker->pool
-		 * and @cwq->pool consistent for sanity.
+		 * and @pwq->pool consistent for sanity.
 		 */
 		if (std_worker_pool_pri(worker->pool))
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
 
-		insert_work(get_cwq(pool->cpu, wq), rebind_work,
+		insert_work(get_pwq(pool->cpu, wq), rebind_work,
 			    worker->scheduled.next,
 			    work_color_to_flags(WORK_NO_COLOR));
 	}
@@ -1845,15 +1844,15 @@ static void idle_worker_timeout(unsigned long __pool)
 
 static bool send_mayday(struct work_struct *work)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
-	struct workqueue_struct *wq = cwq->wq;
+	struct pool_workqueue *pwq = get_work_pwq(work);
+	struct workqueue_struct *wq = pwq->wq;
 	unsigned int cpu;
 
 	if (!(wq->flags & WQ_RESCUER))
 		return false;
 
 	/* mayday mayday mayday */
-	cpu = cwq->pool->cpu;
+	cpu = pwq->pool->cpu;
 	/* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */
 	if (cpu == WORK_CPU_UNBOUND)
 		cpu = 0;
@@ -2082,9 +2081,9 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
 __releases(&pool->lock)
 __acquires(&pool->lock)
 {
-	struct cpu_workqueue_struct *cwq = get_work_cwq(work);
+	struct pool_workqueue *pwq = get_work_pwq(work);
 	struct worker_pool *pool = worker->pool;
-	bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE;
+	bool cpu_intensive = pwq->wq->flags & WQ_CPU_INTENSIVE;
 	int work_color;
 	struct worker *collision;
 #ifdef CONFIG_LOCKDEP
@@ -2125,7 +2124,7 @@ __acquires(&pool->lock)
 	hash_add(pool->busy_hash, &worker->hentry, (unsigned long)work);
 	worker->current_work = work;
 	worker->current_func = work->func;
-	worker->current_cwq = cwq;
+	worker->current_pwq = pwq;
 	work_color = get_work_color(work);
 
 	list_del_init(&work->entry);
@@ -2154,7 +2153,7 @@ __acquires(&pool->lock)
 
 	spin_unlock_irq(&pool->lock);
 
-	lock_map_acquire_read(&cwq->wq->lockdep_map);
+	lock_map_acquire_read(&pwq->wq->lockdep_map);
 	lock_map_acquire(&lockdep_map);
 	trace_workqueue_execute_start(work);
 	worker->current_func(work);
@@ -2164,7 +2163,7 @@ __acquires(&pool->lock)
 	 */
 	trace_workqueue_execute_end(work);
 	lock_map_release(&lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
 		pr_err("BUG: workqueue leaked lock or atomic: %s/0x%08x/%d\n"
@@ -2185,8 +2184,8 @@ __acquires(&pool->lock)
 	hash_del(&worker->hentry);
 	worker->current_work = NULL;
 	worker->current_func = NULL;
-	worker->current_cwq = NULL;
-	cwq_dec_nr_in_flight(cwq, work_color);
+	worker->current_pwq = NULL;
+	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
 /**
@@ -2353,8 +2352,8 @@ repeat:
 	 */
 	for_each_mayday_cpu(cpu, wq->mayday_mask) {
 		unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu;
-		struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq);
-		struct worker_pool *pool = cwq->pool;
+		struct pool_workqueue *pwq = get_pwq(tcpu, wq);
+		struct worker_pool *pool = pwq->pool;
 		struct work_struct *work, *n;
 
 		__set_current_state(TASK_RUNNING);
@@ -2370,7 +2369,7 @@ repeat:
 		 */
 		BUG_ON(!list_empty(&rescuer->scheduled));
 		list_for_each_entry_safe(work, n, &pool->worklist, entry)
-			if (get_work_cwq(work) == cwq)
+			if (get_work_pwq(work) == pwq)
 				move_linked_works(work, scheduled, &n);
 
 		process_scheduled_works(rescuer);
@@ -2405,7 +2404,7 @@ static void wq_barrier_func(struct work_struct *work)
 
 /**
  * insert_wq_barrier - insert a barrier work
- * @cwq: cwq to insert barrier into
+ * @pwq: pwq to insert barrier into
  * @barr: wq_barrier to insert
  * @target: target work to attach @barr to
  * @worker: worker currently executing @target, NULL if @target is not executing
@@ -2422,12 +2421,12 @@ static void wq_barrier_func(struct work_struct *work)
  * after a work with LINKED flag set.
  *
  * Note that when @worker is non-NULL, @target may be modified
- * underneath us, so we can't reliably determine cwq from @target.
+ * underneath us, so we can't reliably determine pwq from @target.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
+static void insert_wq_barrier(struct pool_workqueue *pwq,
 			      struct wq_barrier *barr,
 			      struct work_struct *target, struct worker *worker)
 {
@@ -2460,23 +2459,23 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
 	}
 
 	debug_work_activate(&barr->work);
-	insert_work(cwq, &barr->work, head,
+	insert_work(pwq, &barr->work, head,
 		    work_color_to_flags(WORK_NO_COLOR) | linked);
 }
 
 /**
- * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
+ * flush_workqueue_prep_pwqs - prepare pwqs for workqueue flushing
  * @wq: workqueue being flushed
  * @flush_color: new flush color, < 0 for no-op
  * @work_color: new work color, < 0 for no-op
  *
- * Prepare cwqs for workqueue flushing.
+ * Prepare pwqs for workqueue flushing.
  *
- * If @flush_color is non-negative, flush_color on all cwqs should be
- * -1.  If no cwq has in-flight commands at the specified color, all
- * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
- * has in flight commands, its cwq->flush_color is set to
- * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
+ * If @flush_color is non-negative, flush_color on all pwqs should be
+ * -1.  If no pwq has in-flight commands at the specified color, all
+ * pwq->flush_color's stay at -1 and %false is returned.  If any pwq
+ * has in flight commands, its pwq->flush_color is set to
+ * @flush_color, @wq->nr_pwqs_to_flush is updated accordingly, pwq
  * wakeup logic is armed and %true is returned.
  *
  * The caller should have initialized @wq->first_flusher prior to
@@ -2484,7 +2483,7 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * @flush_color is negative, no flush color update is done and %false
  * is returned.
  *
- * If @work_color is non-negative, all cwqs should have the same
+ * If @work_color is non-negative, all pwqs should have the same
  * work_color which is previous to @work_color and all will be
  * advanced to @work_color.
  *
@@ -2495,42 +2494,42 @@ static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
  * %true if @flush_color >= 0 and there's something to flush.  %false
  * otherwise.
  */
-static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
+static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,
 				      int flush_color, int work_color)
 {
 	bool wait = false;
 	unsigned int cpu;
 
 	if (flush_color >= 0) {
-		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
-		atomic_set(&wq->nr_cwqs_to_flush, 1);
+		BUG_ON(atomic_read(&wq->nr_pwqs_to_flush));
+		atomic_set(&wq->nr_pwqs_to_flush, 1);
 	}
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (flush_color >= 0) {
-			BUG_ON(cwq->flush_color != -1);
+			BUG_ON(pwq->flush_color != -1);
 
-			if (cwq->nr_in_flight[flush_color]) {
-				cwq->flush_color = flush_color;
-				atomic_inc(&wq->nr_cwqs_to_flush);
+			if (pwq->nr_in_flight[flush_color]) {
+				pwq->flush_color = flush_color;
+				atomic_inc(&wq->nr_pwqs_to_flush);
 				wait = true;
 			}
 		}
 
 		if (work_color >= 0) {
-			BUG_ON(work_color != work_next_color(cwq->work_color));
-			cwq->work_color = work_color;
+			BUG_ON(work_color != work_next_color(pwq->work_color));
+			pwq->work_color = work_color;
 		}
 
 		spin_unlock_irq(&pool->lock);
 	}
 
-	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
+	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_pwqs_to_flush))
 		complete(&wq->first_flusher->done);
 
 	return wait;
@@ -2581,7 +2580,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			wq->first_flusher = &this_flusher;
 
-			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
+			if (!flush_workqueue_prep_pwqs(wq, wq->flush_color,
 						       wq->work_color)) {
 				/* nothing to flush, done */
 				wq->flush_color = next_color;
@@ -2592,7 +2591,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 			/* wait in queue */
 			BUG_ON(wq->flush_color == this_flusher.flush_color);
 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 	} else {
 		/*
@@ -2659,7 +2658,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 			list_splice_tail_init(&wq->flusher_overflow,
 					      &wq->flusher_queue);
-			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
+			flush_workqueue_prep_pwqs(wq, -1, wq->work_color);
 		}
 
 		if (list_empty(&wq->flusher_queue)) {
@@ -2669,7 +2668,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 
 		/*
 		 * Need to flush more colors.  Make the next flusher
-		 * the new first flusher and arm cwqs.
+		 * the new first flusher and arm pwqs.
 		 */
 		BUG_ON(wq->flush_color == wq->work_color);
 		BUG_ON(wq->flush_color != next->flush_color);
@@ -2677,7 +2676,7 @@ void flush_workqueue(struct workqueue_struct *wq)
 		list_del_init(&next->list);
 		wq->first_flusher = next;
 
-		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
+		if (flush_workqueue_prep_pwqs(wq, wq->flush_color, -1))
 			break;
 
 		/*
@@ -2720,13 +2719,13 @@ void drain_workqueue(struct workqueue_struct *wq)
 reflush:
 	flush_workqueue(wq);
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		bool drained;
 
-		spin_lock_irq(&cwq->pool->lock);
-		drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
-		spin_unlock_irq(&cwq->pool->lock);
+		spin_lock_irq(&pwq->pool->lock);
+		drained = !pwq->nr_active && list_empty(&pwq->delayed_works);
+		spin_unlock_irq(&pwq->pool->lock);
 
 		if (drained)
 			continue;
@@ -2749,7 +2748,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 {
 	struct worker *worker = NULL;
 	struct worker_pool *pool;
-	struct cpu_workqueue_struct *cwq;
+	struct pool_workqueue *pwq;
 
 	might_sleep();
 	pool = get_work_pool(work);
@@ -2758,18 +2757,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 
 	spin_lock_irq(&pool->lock);
 	/* see the comment in try_to_grab_pending() with the same code */
-	cwq = get_work_cwq(work);
-	if (cwq) {
-		if (unlikely(cwq->pool != pool))
+	pwq = get_work_pwq(work);
+	if (pwq) {
+		if (unlikely(pwq->pool != pool))
 			goto already_gone;
 	} else {
 		worker = find_worker_executing_work(pool, work);
 		if (!worker)
 			goto already_gone;
-		cwq = worker->current_cwq;
+		pwq = worker->current_pwq;
 	}
 
-	insert_wq_barrier(cwq, barr, work, worker);
+	insert_wq_barrier(pwq, barr, work, worker);
 	spin_unlock_irq(&pool->lock);
 
 	/*
@@ -2778,11 +2777,11 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	 * flusher is not running on the same workqueue by verifying write
 	 * access.
 	 */
-	if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
-		lock_map_acquire(&cwq->wq->lockdep_map);
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
+		lock_map_acquire(&pwq->wq->lockdep_map);
 	else
-		lock_map_acquire_read(&cwq->wq->lockdep_map);
-	lock_map_release(&cwq->wq->lockdep_map);
+		lock_map_acquire_read(&pwq->wq->lockdep_map);
+	lock_map_release(&pwq->wq->lockdep_map);
 
 	return true;
 already_gone:
@@ -3092,46 +3091,46 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static int alloc_cwqs(struct workqueue_struct *wq)
+static int alloc_pwqs(struct workqueue_struct *wq)
 {
 	/*
-	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
+	 * pwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
 	 * Make sure that the alignment isn't lower than that of
 	 * unsigned long long.
 	 */
-	const size_t size = sizeof(struct cpu_workqueue_struct);
+	const size_t size = sizeof(struct pool_workqueue);
 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
 				   __alignof__(unsigned long long));
 
 	if (!(wq->flags & WQ_UNBOUND))
-		wq->cpu_wq.pcpu = __alloc_percpu(size, align);
+		wq->pool_wq.pcpu = __alloc_percpu(size, align);
 	else {
 		void *ptr;
 
 		/*
-		 * Allocate enough room to align cwq and put an extra
+		 * Allocate enough room to align pwq and put an extra
 		 * pointer at the end pointing back to the originally
 		 * allocated pointer which will be used for free.
 		 */
 		ptr = kzalloc(size + align + sizeof(void *), GFP_KERNEL);
 		if (ptr) {
-			wq->cpu_wq.single = PTR_ALIGN(ptr, align);
-			*(void **)(wq->cpu_wq.single + 1) = ptr;
+			wq->pool_wq.single = PTR_ALIGN(ptr, align);
+			*(void **)(wq->pool_wq.single + 1) = ptr;
 		}
 	}
 
 	/* just in case, make sure it's actually aligned */
-	BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align));
-	return wq->cpu_wq.v ? 0 : -ENOMEM;
+	BUG_ON(!IS_ALIGNED(wq->pool_wq.v, align));
+	return wq->pool_wq.v ? 0 : -ENOMEM;
 }
 
-static void free_cwqs(struct workqueue_struct *wq)
+static void free_pwqs(struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND))
-		free_percpu(wq->cpu_wq.pcpu);
-	else if (wq->cpu_wq.single) {
-		/* the pointer to free is stored right after the cwq */
-		kfree(*(void **)(wq->cpu_wq.single + 1));
+		free_percpu(wq->pool_wq.pcpu);
+	else if (wq->pool_wq.single) {
+		/* the pointer to free is stored right after the pwq */
+		kfree(*(void **)(wq->pool_wq.single + 1));
 	}
 }
 
@@ -3185,25 +3184,25 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	wq->flags = flags;
 	wq->saved_max_active = max_active;
 	mutex_init(&wq->flush_mutex);
-	atomic_set(&wq->nr_cwqs_to_flush, 0);
+	atomic_set(&wq->nr_pwqs_to_flush, 0);
 	INIT_LIST_HEAD(&wq->flusher_queue);
 	INIT_LIST_HEAD(&wq->flusher_overflow);
 
 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
 	INIT_LIST_HEAD(&wq->list);
 
-	if (alloc_cwqs(wq) < 0)
+	if (alloc_pwqs(wq) < 0)
 		goto err;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
-		cwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
-		cwq->wq = wq;
-		cwq->flush_color = -1;
-		cwq->max_active = max_active;
-		INIT_LIST_HEAD(&cwq->delayed_works);
+		BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
+		pwq->pool = get_std_worker_pool(cpu, flags & WQ_HIGHPRI);
+		pwq->wq = wq;
+		pwq->flush_color = -1;
+		pwq->max_active = max_active;
+		INIT_LIST_HEAD(&pwq->delayed_works);
 	}
 
 	if (flags & WQ_RESCUER) {
@@ -3234,8 +3233,8 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	spin_lock(&workqueue_lock);
 
 	if (workqueue_freezing && wq->flags & WQ_FREEZABLE)
-		for_each_cwq_cpu(cpu, wq)
-			get_cwq(cpu, wq)->max_active = 0;
+		for_each_pwq_cpu(cpu, wq)
+			get_pwq(cpu, wq)->max_active = 0;
 
 	list_add(&wq->list, &workqueues);
 
@@ -3244,7 +3243,7 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	return wq;
 err:
 	if (wq) {
-		free_cwqs(wq);
+		free_pwqs(wq);
 		free_mayday_mask(wq->mayday_mask);
 		kfree(wq->rescuer);
 		kfree(wq);
@@ -3275,14 +3274,14 @@ void destroy_workqueue(struct workqueue_struct *wq)
 	spin_unlock(&workqueue_lock);
 
 	/* sanity check */
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
 		int i;
 
 		for (i = 0; i < WORK_NR_COLORS; i++)
-			BUG_ON(cwq->nr_in_flight[i]);
-		BUG_ON(cwq->nr_active);
-		BUG_ON(!list_empty(&cwq->delayed_works));
+			BUG_ON(pwq->nr_in_flight[i]);
+		BUG_ON(pwq->nr_active);
+		BUG_ON(!list_empty(&pwq->delayed_works));
 	}
 
 	if (wq->flags & WQ_RESCUER) {
@@ -3291,29 +3290,29 @@ void destroy_workqueue(struct workqueue_struct *wq)
 		kfree(wq->rescuer);
 	}
 
-	free_cwqs(wq);
+	free_pwqs(wq);
 	kfree(wq);
 }
 EXPORT_SYMBOL_GPL(destroy_workqueue);
 
 /**
- * cwq_set_max_active - adjust max_active of a cwq
- * @cwq: target cpu_workqueue_struct
+ * pwq_set_max_active - adjust max_active of a pwq
+ * @pwq: target pool_workqueue
  * @max_active: new max_active value.
  *
- * Set @cwq->max_active to @max_active and activate delayed works if
+ * Set @pwq->max_active to @max_active and activate delayed works if
  * increased.
  *
  * CONTEXT:
  * spin_lock_irq(pool->lock).
  */
-static void cwq_set_max_active(struct cpu_workqueue_struct *cwq, int max_active)
+static void pwq_set_max_active(struct pool_workqueue *pwq, int max_active)
 {
-	cwq->max_active = max_active;
+	pwq->max_active = max_active;
 
-	while (!list_empty(&cwq->delayed_works) &&
-	       cwq->nr_active < cwq->max_active)
-		cwq_activate_first_delayed(cwq);
+	while (!list_empty(&pwq->delayed_works) &&
+	       pwq->nr_active < pwq->max_active)
+		pwq_activate_first_delayed(pwq);
 }
 
 /**
@@ -3336,15 +3335,15 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 
 	wq->saved_max_active = max_active;
 
-	for_each_cwq_cpu(cpu, wq) {
-		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
-		struct worker_pool *pool = cwq->pool;
+	for_each_pwq_cpu(cpu, wq) {
+		struct pool_workqueue *pwq = get_pwq(cpu, wq);
+		struct worker_pool *pool = pwq->pool;
 
 		spin_lock_irq(&pool->lock);
 
 		if (!(wq->flags & WQ_FREEZABLE) ||
 		    !(pool->flags & POOL_FREEZING))
-			cwq_set_max_active(cwq, max_active);
+			pwq_set_max_active(pwq, max_active);
 
 		spin_unlock_irq(&pool->lock);
 	}
@@ -3367,9 +3366,9 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  */
 bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
 {
-	struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+	struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-	return !list_empty(&cwq->delayed_works);
+	return !list_empty(&pwq->delayed_works);
 }
 EXPORT_SYMBOL_GPL(workqueue_congested);
 
@@ -3408,7 +3407,7 @@ EXPORT_SYMBOL_GPL(work_busy);
  * CPU hotplug.
  *
  * There are two challenges in supporting CPU hotplug.  Firstly, there
- * are a lot of assumptions on strong associations among work, cwq and
+ * are a lot of assumptions on strong associations among work, pwq and
  * pool which make migrating pending and scheduled works very
  * difficult to implement without impacting hot paths.  Secondly,
  * worker pools serve mix of short, long and very long running works making
@@ -3612,11 +3611,11 @@ void freeze_workqueues_begin(void)
 			pool->flags |= POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (cwq && cwq->pool == pool &&
+				if (pwq && pwq->pool == pool &&
 				    (wq->flags & WQ_FREEZABLE))
-					cwq->max_active = 0;
+					pwq->max_active = 0;
 			}
 
 			spin_unlock_irq(&pool->lock);
@@ -3655,13 +3654,13 @@ bool freeze_workqueues_busy(void)
 		 * to peek without lock.
 		 */
 		list_for_each_entry(wq, &workqueues, list) {
-			struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+			struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-			if (!cwq || !(wq->flags & WQ_FREEZABLE))
+			if (!pwq || !(wq->flags & WQ_FREEZABLE))
 				continue;
 
-			BUG_ON(cwq->nr_active < 0);
-			if (cwq->nr_active) {
+			BUG_ON(pwq->nr_active < 0);
+			if (pwq->nr_active) {
 				busy = true;
 				goto out_unlock;
 			}
@@ -3701,14 +3700,14 @@ void thaw_workqueues(void)
 			pool->flags &= ~POOL_FREEZING;
 
 			list_for_each_entry(wq, &workqueues, list) {
-				struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
+				struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
-				if (!cwq || cwq->pool != pool ||
+				if (!pwq || pwq->pool != pool ||
 				    !(wq->flags & WQ_FREEZABLE))
 					continue;
 
 				/* restore max_active and repopulate worklist */
-				cwq_set_max_active(cwq, wq->saved_max_active);
+				pwq_set_max_active(pwq, wq->saved_max_active);
 			}
 
 			wake_up_worker(pool);
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 328be4a269aa..07650264ec15 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -28,7 +28,7 @@ struct worker {
 
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
-	struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */
+	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
 	struct list_head	scheduled;	/* L: scheduled works */
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */
-- 
cgit v1.2.3


From 45d9550a0e7e9230606ca3c4c6f4dc6297848b2f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 19 Feb 2013 12:17:01 -0800
Subject: workqueue: allow more off-queue flag space

When a work item is off-queue, its work->data contains WORK_STRUCT_*
and WORK_OFFQ_* flags.  As WORK_OFFQ_* flags are used only while a
work item is off-queue, it can occupy bits of work->data which aren't
used while off-queue.  WORK_OFFQ_* currently only use bits used by
on-queue CWQ pointer.  As color bits aren't used while off-queue,
there's no reason to not use them.

Lower WORK_OFFQ_FLAG_BASE from WORK_STRUCT_FLAG_BITS to
WORK_STRUCT_COLOR_SHIFT thus giving 4 more bits to off-queue flag
space which is also used to record worker_pool ID while off-queue.

This doesn't introduce any visible behavior difference.

tj: Rewrote the description.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8afab27cdbc2..5bd030f630a9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -68,7 +68,7 @@ enum {
 				  WORK_STRUCT_COLOR_BITS,
 
 	/* data contains off-queue information when !WORK_STRUCT_PWQ */
-	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_FLAG_BITS,
+	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_COLOR_SHIFT,
 
 	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
 
-- 
cgit v1.2.3


From d84ff0512f1bfc0d8c864efadb4523fce68919cc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:29:59 -0700
Subject: workqueue: consistently use int for @cpu variables

Workqueue is mixing unsigned int and int for @cpu variables.  There's
no point in using unsigned int for cpus - many of cpu related APIs
take int anyway.  Consistently use int for @cpu variables so that we
can use negative values to mark special ones.

This patch doesn't introduce any visible behavior changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h   |  6 +++---
 kernel/workqueue.c          | 24 +++++++++++-------------
 kernel/workqueue_internal.h |  5 ++---
 3 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5bd030f630a9..899be6636d20 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,7 +435,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
-extern bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq);
+extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 
 /*
@@ -466,12 +466,12 @@ static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwo
 }
 
 #ifndef CONFIG_SMP
-static inline long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
 	return fn(arg);
 }
 #else
-long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg);
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg);
 #endif /* CONFIG_SMP */
 
 #ifdef CONFIG_FREEZER
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 26c67c76b6c5..73c5f68065b5 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -124,7 +124,7 @@ enum {
 
 struct worker_pool {
 	spinlock_t		lock;		/* the pool lock */
-	unsigned int		cpu;		/* I: the associated cpu */
+	int			cpu;		/* I: the associated cpu */
 	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
 
@@ -467,8 +467,7 @@ static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 	return &pools[highpri];
 }
 
-static struct pool_workqueue *get_pwq(unsigned int cpu,
-				      struct workqueue_struct *wq)
+static struct pool_workqueue *get_pwq(int cpu, struct workqueue_struct *wq)
 {
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (likely(cpu < nr_cpu_ids))
@@ -730,7 +729,7 @@ static void wake_up_worker(struct worker_pool *pool)
  * CONTEXT:
  * spin_lock_irq(rq->lock)
  */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
+void wq_worker_waking_up(struct task_struct *task, int cpu)
 {
 	struct worker *worker = kthread_data(task);
 
@@ -755,8 +754,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu)
  * RETURNS:
  * Worker task on @cpu to wake up, %NULL if none.
  */
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
-				       unsigned int cpu)
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
 {
 	struct worker *worker = kthread_data(task), *to_wakeup = NULL;
 	struct worker_pool *pool;
@@ -1159,7 +1157,7 @@ static bool is_chained_work(struct workqueue_struct *wq)
 	return worker && worker->current_pwq->wq == wq;
 }
 
-static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
+static void __queue_work(int cpu, struct workqueue_struct *wq,
 			 struct work_struct *work)
 {
 	struct pool_workqueue *pwq;
@@ -1714,7 +1712,7 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (pool->cpu != WORK_CPU_UNBOUND)
 		worker->task = kthread_create_on_node(worker_thread,
 					worker, cpu_to_node(pool->cpu),
-					"kworker/%u:%d%s", pool->cpu, id, pri);
+					"kworker/%d:%d%s", pool->cpu, id, pri);
 	else
 		worker->task = kthread_create(worker_thread, worker,
 					      "kworker/u:%d%s", id, pri);
@@ -3345,7 +3343,7 @@ EXPORT_SYMBOL_GPL(workqueue_set_max_active);
  * RETURNS:
  * %true if congested, %false otherwise.
  */
-bool workqueue_congested(unsigned int cpu, struct workqueue_struct *wq)
+bool workqueue_congested(int cpu, struct workqueue_struct *wq)
 {
 	struct pool_workqueue *pwq = get_pwq(cpu, wq);
 
@@ -3461,7 +3459,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 					       unsigned long action,
 					       void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
+	int cpu = (unsigned long)hcpu;
 	struct worker_pool *pool;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -3507,7 +3505,7 @@ static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
 						 unsigned long action,
 						 void *hcpu)
 {
-	unsigned int cpu = (unsigned long)hcpu;
+	int cpu = (unsigned long)hcpu;
 	struct work_struct unbind_work;
 
 	switch (action & ~CPU_TASKS_FROZEN) {
@@ -3547,7 +3545,7 @@ static void work_for_cpu_fn(struct work_struct *work)
  * It is up to the caller to ensure that the cpu doesn't go offline.
  * The caller must not hold any locks which would prevent @fn from completing.
  */
-long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
+long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
 	struct work_for_cpu wfc = { .fn = fn, .arg = arg };
 
@@ -3705,7 +3703,7 @@ out_unlock:
 
 static int __init init_workqueues(void)
 {
-	unsigned int cpu;
+	int cpu;
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index f9c887731e2b..f116f071d919 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -59,8 +59,7 @@ static inline struct worker *current_wq_worker(void)
  * Scheduler hooks for concurrency managed workqueue.  Only to be used from
  * sched.c and workqueue.c.
  */
-void wq_worker_waking_up(struct task_struct *task, unsigned int cpu);
-struct task_struct *wq_worker_sleeping(struct task_struct *task,
-				       unsigned int cpu);
+void wq_worker_waking_up(struct task_struct *task, int cpu);
+struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
 
 #endif /* _KERNEL_WORKQUEUE_INTERNAL_H */
-- 
cgit v1.2.3


From 7a4e344c5675eefbde93ed9a98ef45e0e4957bc2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:00 -0700
Subject: workqueue: introduce workqueue_attrs

Introduce struct workqueue_attrs which carries worker attributes -
currently the nice level and allowed cpumask along with helper
routines alloc_workqueue_attrs() and free_workqueue_attrs().

Each worker_pool now carries ->attrs describing the attributes of its
workers.  All functions dealing with cpumask and nice level of workers
are updated to follow worker_pool->attrs instead of determining them
from other characteristics of the worker_pool, and init_workqueues()
is updated to set worker_pool->attrs appropriately for all standard
pools.

Note that create_worker() is updated to always perform set_user_nice()
and use set_cpus_allowed_ptr() combined with manual assertion of
PF_THREAD_BOUND instead of kthread_bind().  This simplifies handling
random attributes without affecting the outcome.

This patch doesn't introduce any behavior changes.

v2: Missing cpumask_var_t definition caused build failure on some
    archs.  linux/cpumask.h included.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: kbuild test robot <fengguang.wu@intel.com>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  13 ++++++
 kernel/workqueue.c        | 103 ++++++++++++++++++++++++++++++++++++----------
 2 files changed, 94 insertions(+), 22 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 899be6636d20..00c1b9ba8252 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -11,6 +11,7 @@
 #include <linux/lockdep.h>
 #include <linux/threads.h>
 #include <linux/atomic.h>
+#include <linux/cpumask.h>
 
 struct workqueue_struct;
 
@@ -115,6 +116,15 @@ struct delayed_work {
 	int cpu;
 };
 
+/*
+ * A struct for workqueue attributes.  This can be used to change
+ * attributes of an unbound workqueue.
+ */
+struct workqueue_attrs {
+	int			nice;		/* nice level */
+	cpumask_var_t		cpumask;	/* allowed CPUs */
+};
+
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
 {
 	return container_of(work, struct delayed_work, work);
@@ -399,6 +409,9 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
+void free_workqueue_attrs(struct workqueue_attrs *attrs);
+
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
 extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 094f16668e1b..b0d3cbb83f63 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,8 @@ struct worker_pool {
 	struct mutex		assoc_mutex;	/* protect POOL_DISASSOCIATED */
 	struct ida		worker_ida;	/* L: for worker IDs */
 
+	struct workqueue_attrs	*attrs;		/* I: worker attributes */
+
 	/*
 	 * The current concurrency level.  As it's likely to be accessed
 	 * from other CPUs during try_to_wake_up(), put it in a separate
@@ -1566,14 +1568,13 @@ __acquires(&pool->lock)
 		 * against POOL_DISASSOCIATED.
 		 */
 		if (!(pool->flags & POOL_DISASSOCIATED))
-			set_cpus_allowed_ptr(current, get_cpu_mask(pool->cpu));
+			set_cpus_allowed_ptr(current, pool->attrs->cpumask);
 
 		spin_lock_irq(&pool->lock);
 		if (pool->flags & POOL_DISASSOCIATED)
 			return false;
 		if (task_cpu(current) == pool->cpu &&
-		    cpumask_equal(&current->cpus_allowed,
-				  get_cpu_mask(pool->cpu)))
+		    cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
 			return true;
 		spin_unlock_irq(&pool->lock);
 
@@ -1679,7 +1680,7 @@ static void rebind_workers(struct worker_pool *pool)
 		 * wq doesn't really matter but let's keep @worker->pool
 		 * and @pwq->pool consistent for sanity.
 		 */
-		if (std_worker_pool_pri(worker->pool))
+		if (worker->pool->attrs->nice < 0)
 			wq = system_highpri_wq;
 		else
 			wq = system_wq;
@@ -1721,7 +1722,7 @@ static struct worker *alloc_worker(void)
  */
 static struct worker *create_worker(struct worker_pool *pool)
 {
-	const char *pri = std_worker_pool_pri(pool) ? "H" : "";
+	const char *pri = pool->attrs->nice < 0  ? "H" : "";
 	struct worker *worker = NULL;
 	int id = -1;
 
@@ -1751,24 +1752,23 @@ static struct worker *create_worker(struct worker_pool *pool)
 	if (IS_ERR(worker->task))
 		goto fail;
 
-	if (std_worker_pool_pri(pool))
-		set_user_nice(worker->task, HIGHPRI_NICE_LEVEL);
+	set_user_nice(worker->task, pool->attrs->nice);
+	set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
 
 	/*
-	 * Determine CPU binding of the new worker depending on
-	 * %POOL_DISASSOCIATED.  The caller is responsible for ensuring the
-	 * flag remains stable across this function.  See the comments
-	 * above the flag definition for details.
-	 *
-	 * As an unbound worker may later become a regular one if CPU comes
-	 * online, make sure every worker has %PF_THREAD_BOUND set.
+	 * %PF_THREAD_BOUND is used to prevent userland from meddling with
+	 * cpumask of workqueue workers.  This is an abuse.  We need
+	 * %PF_NO_SETAFFINITY.
 	 */
-	if (!(pool->flags & POOL_DISASSOCIATED)) {
-		kthread_bind(worker->task, pool->cpu);
-	} else {
-		worker->task->flags |= PF_THREAD_BOUND;
+	worker->task->flags |= PF_THREAD_BOUND;
+
+	/*
+	 * The caller is responsible for ensuring %POOL_DISASSOCIATED
+	 * remains stable across this function.  See the comments above the
+	 * flag definition for details.
+	 */
+	if (pool->flags & POOL_DISASSOCIATED)
 		worker->flags |= WORKER_UNBOUND;
-	}
 
 	return worker;
 fail:
@@ -3123,7 +3123,52 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
-static void init_worker_pool(struct worker_pool *pool)
+/**
+ * free_workqueue_attrs - free a workqueue_attrs
+ * @attrs: workqueue_attrs to free
+ *
+ * Undo alloc_workqueue_attrs().
+ */
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
+{
+	if (attrs) {
+		free_cpumask_var(attrs->cpumask);
+		kfree(attrs);
+	}
+}
+
+/**
+ * alloc_workqueue_attrs - allocate a workqueue_attrs
+ * @gfp_mask: allocation mask to use
+ *
+ * Allocate a new workqueue_attrs, initialize with default settings and
+ * return it.  Returns NULL on failure.
+ */
+struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask)
+{
+	struct workqueue_attrs *attrs;
+
+	attrs = kzalloc(sizeof(*attrs), gfp_mask);
+	if (!attrs)
+		goto fail;
+	if (!alloc_cpumask_var(&attrs->cpumask, gfp_mask))
+		goto fail;
+
+	cpumask_setall(attrs->cpumask);
+	return attrs;
+fail:
+	free_workqueue_attrs(attrs);
+	return NULL;
+}
+
+/**
+ * init_worker_pool - initialize a newly zalloc'd worker_pool
+ * @pool: worker_pool to initialize
+ *
+ * Initiailize a newly zalloc'd @pool.  It also allocates @pool->attrs.
+ * Returns 0 on success, -errno on failure.
+ */
+static int init_worker_pool(struct worker_pool *pool)
 {
 	spin_lock_init(&pool->lock);
 	pool->flags |= POOL_DISASSOCIATED;
@@ -3141,6 +3186,11 @@ static void init_worker_pool(struct worker_pool *pool)
 	mutex_init(&pool->manager_arb);
 	mutex_init(&pool->assoc_mutex);
 	ida_init(&pool->worker_ida);
+
+	pool->attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!pool->attrs)
+		return -ENOMEM;
+	return 0;
 }
 
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
@@ -3792,7 +3842,8 @@ out_unlock:
 
 static int __init init_workqueues(void)
 {
-	int cpu;
+	int std_nice[NR_STD_WORKER_POOLS] = { 0, HIGHPRI_NICE_LEVEL };
+	int i, cpu;
 
 	/* make sure we have enough bits for OFFQ pool ID */
 	BUILD_BUG_ON((1LU << (BITS_PER_LONG - WORK_OFFQ_POOL_SHIFT)) <
@@ -3809,10 +3860,18 @@ static int __init init_workqueues(void)
 	for_each_wq_cpu(cpu) {
 		struct worker_pool *pool;
 
+		i = 0;
 		for_each_std_worker_pool(pool, cpu) {
-			init_worker_pool(pool);
+			BUG_ON(init_worker_pool(pool));
 			pool->cpu = cpu;
 
+			if (cpu != WORK_CPU_UNBOUND)
+				cpumask_copy(pool->attrs->cpumask, cpumask_of(cpu));
+			else
+				cpumask_setall(pool->attrs->cpumask);
+
+			pool->attrs->nice = std_nice[i++];
+
 			/* alloc pool ID */
 			BUG_ON(worker_pool_assign_id(pool));
 		}
-- 
cgit v1.2.3


From 493008a8e475771a2126e0ce95a73e35b371d277 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:03 -0700
Subject: workqueue: drop WQ_RESCUER and test workqueue->rescuer for NULL
 instead

WQ_RESCUER is superflous.  WQ_MEM_RECLAIM indicates that the user
wants a rescuer and testing wq->rescuer for NULL can answer whether a
given workqueue has a rescuer or not.  Drop WQ_RESCUER and test
wq->rescuer directly.

This will help simplifying __alloc_workqueue_key() failure path by
allowing it to use destroy_workqueue() on a partially constructed
workqueue, which in turn will help implementing dynamic management of
pool_workqueues.

While at it, clear wq->rescuer after freeing it in
destroy_workqueue().  This is a precaution as scheduled changes will
make destruction more complex.

This patch doesn't introduce any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  1 -
 kernel/workqueue.c        | 22 ++++++++++------------
 2 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 00c1b9ba8252..c270b4eedf16 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -295,7 +295,6 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	WQ_DRAINING		= 1 << 6, /* internal: workqueue is draining */
-	WQ_RESCUER		= 1 << 7, /* internal: workqueue has rescuer */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a8b86f7b6e34..7ff2b9c5cc3a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1827,7 +1827,7 @@ static void send_mayday(struct work_struct *work)
 
 	lockdep_assert_held(&workqueue_lock);
 
-	if (!(wq->flags & WQ_RESCUER))
+	if (!wq->rescuer)
 		return;
 
 	/* mayday mayday mayday */
@@ -2285,7 +2285,7 @@ sleep:
  * @__rescuer: self
  *
  * Workqueue rescuer thread function.  There's one rescuer for each
- * workqueue which has WQ_RESCUER set.
+ * workqueue which has WQ_MEM_RECLAIM set.
  *
  * Regular work processing on a pool may block trying to create a new
  * worker which uses GFP_KERNEL allocation which has slight chance of
@@ -2769,7 +2769,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
 	 * flusher is not running on the same workqueue by verifying write
 	 * access.
 	 */
-	if (pwq->wq->saved_max_active == 1 || pwq->wq->flags & WQ_RESCUER)
+	if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
 		lock_map_acquire(&pwq->wq->lockdep_map);
 	else
 		lock_map_acquire_read(&pwq->wq->lockdep_map);
@@ -3412,13 +3412,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	va_end(args);
 	va_end(args1);
 
-	/*
-	 * Workqueues which may be used during memory reclaim should
-	 * have a rescuer to guarantee forward progress.
-	 */
-	if (flags & WQ_MEM_RECLAIM)
-		flags |= WQ_RESCUER;
-
 	max_active = max_active ?: WQ_DFL_ACTIVE;
 	max_active = wq_clamp_max_active(max_active, flags, wq->name);
 
@@ -3449,7 +3442,11 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	}
 	local_irq_enable();
 
-	if (flags & WQ_RESCUER) {
+	/*
+	 * Workqueues which may be used during memory reclaim should
+	 * have a rescuer to guarantee forward progress.
+	 */
+	if (flags & WQ_MEM_RECLAIM) {
 		struct worker *rescuer;
 
 		wq->rescuer = rescuer = alloc_worker();
@@ -3533,9 +3530,10 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 	spin_unlock_irq(&workqueue_lock);
 
-	if (wq->flags & WQ_RESCUER) {
+	if (wq->rescuer) {
 		kthread_stop(wq->rescuer->task);
 		kfree(wq->rescuer);
+		wq->rescuer = NULL;
 	}
 
 	/*
-- 
cgit v1.2.3


From 9e8cd2f5898ab6710ad81f4583fada08bf8049a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: implement apply_workqueue_attrs()

Implement apply_workqueue_attrs() which applies workqueue_attrs to the
specified unbound workqueue by creating a new pwq (pool_workqueue)
linked to worker_pool with the specified attributes.

A new pwq is linked at the head of wq->pwqs instead of tail and
__queue_work() verifies that the first unbound pwq has positive refcnt
before choosing it for the actual queueing.  This is to cover the case
where creation of a new pwq races with queueing.  As base ref on a pwq
won't be dropped without making another pwq the first one,
__queue_work() is guaranteed to make progress and not add work item to
a dead pwq.

init_and_link_pwq() is updated to return the last first pwq the new
pwq replaced, which is put by apply_workqueue_attrs().

Note that apply_workqueue_attrs() is almost identical to unbound pwq
part of alloc_and_link_pwqs().  The only difference is that there is
no previous first pwq.  apply_workqueue_attrs() is implemented to
handle such cases and replaces unbound pwq handling in
alloc_and_link_pwqs().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h |  2 ++
 kernel/workqueue.c        | 91 ++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 73 insertions(+), 20 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index c270b4eedf16..e152394fa7eb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -410,6 +410,8 @@ extern void destroy_workqueue(struct workqueue_struct *wq);
 
 struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
 void free_workqueue_attrs(struct workqueue_attrs *attrs);
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 16fb6747276a..2a67fbbd192c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1228,7 +1228,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	if (unlikely(wq->flags & WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
-
+retry:
 	/* pwq which will be used unless @work is executing elsewhere */
 	if (!(wq->flags & WQ_UNBOUND)) {
 		if (cpu == WORK_CPU_UNBOUND)
@@ -1262,6 +1262,25 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 		spin_lock(&pwq->pool->lock);
 	}
 
+	/*
+	 * pwq is determined and locked.  For unbound pools, we could have
+	 * raced with pwq release and it could already be dead.  If its
+	 * refcnt is zero, repeat pwq selection.  Note that pwqs never die
+	 * without another pwq replacing it as the first pwq or while a
+	 * work item is executing on it, so the retying is guaranteed to
+	 * make forward-progress.
+	 */
+	if (unlikely(!pwq->refcnt)) {
+		if (wq->flags & WQ_UNBOUND) {
+			spin_unlock(&pwq->pool->lock);
+			cpu_relax();
+			goto retry;
+		}
+		/* oops */
+		WARN_ONCE(true, "workqueue: per-cpu pwq for %s on cpu%d has 0 refcnt",
+			  wq->name, cpu);
+	}
+
 	/* pwq determined, queue */
 	trace_workqueue_queue_work(req_cpu, pwq, work);
 
@@ -3425,7 +3444,8 @@ static void pwq_unbound_release_workfn(struct work_struct *work)
 
 static void init_and_link_pwq(struct pool_workqueue *pwq,
 			      struct workqueue_struct *wq,
-			      struct worker_pool *pool)
+			      struct worker_pool *pool,
+			      struct pool_workqueue **p_last_pwq)
 {
 	BUG_ON((unsigned long)pwq & WORK_STRUCT_FLAG_MASK);
 
@@ -3445,13 +3465,58 @@ static void init_and_link_pwq(struct pool_workqueue *pwq,
 	mutex_lock(&wq->flush_mutex);
 	spin_lock_irq(&workqueue_lock);
 
+	if (p_last_pwq)
+		*p_last_pwq = first_pwq(wq);
 	pwq->work_color = wq->work_color;
-	list_add_tail_rcu(&pwq->pwqs_node, &wq->pwqs);
+	list_add_rcu(&pwq->pwqs_node, &wq->pwqs);
 
 	spin_unlock_irq(&workqueue_lock);
 	mutex_unlock(&wq->flush_mutex);
 }
 
+/**
+ * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
+ * @wq: the target workqueue
+ * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
+ *
+ * Apply @attrs to an unbound workqueue @wq.  If @attrs doesn't match the
+ * current attributes, a new pwq is created and made the first pwq which
+ * will serve all new work items.  Older pwqs are released as in-flight
+ * work items finish.  Note that a work item which repeatedly requeues
+ * itself back-to-back will stay on its current pwq.
+ *
+ * Performs GFP_KERNEL allocations.  Returns 0 on success and -errno on
+ * failure.
+ */
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs)
+{
+	struct pool_workqueue *pwq, *last_pwq;
+	struct worker_pool *pool;
+
+	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
+		return -EINVAL;
+
+	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
+	if (!pwq)
+		return -ENOMEM;
+
+	pool = get_unbound_pool(attrs);
+	if (!pool) {
+		kmem_cache_free(pwq_cache, pwq);
+		return -ENOMEM;
+	}
+
+	init_and_link_pwq(pwq, wq, pool, &last_pwq);
+	if (last_pwq) {
+		spin_lock_irq(&last_pwq->pool->lock);
+		put_pwq(last_pwq);
+		spin_unlock_irq(&last_pwq->pool->lock);
+	}
+
+	return 0;
+}
+
 static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 {
 	bool highpri = wq->flags & WQ_HIGHPRI;
@@ -3468,26 +3533,12 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq)
 			struct worker_pool *cpu_pools =
 				per_cpu(cpu_worker_pools, cpu);
 
-			init_and_link_pwq(pwq, wq, &cpu_pools[highpri]);
+			init_and_link_pwq(pwq, wq, &cpu_pools[highpri], NULL);
 		}
+		return 0;
 	} else {
-		struct pool_workqueue *pwq;
-		struct worker_pool *pool;
-
-		pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
-		if (!pwq)
-			return -ENOMEM;
-
-		pool = get_unbound_pool(unbound_std_wq_attrs[highpri]);
-		if (!pool) {
-			kmem_cache_free(pwq_cache, pwq);
-			return -ENOMEM;
-		}
-
-		init_and_link_pwq(pwq, wq, pool);
+		return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
 	}
-
-	return 0;
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
-- 
cgit v1.2.3


From 618b01eb426dd2d73a4b5e5ebc6379e4eee3b123 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: make it clear that WQ_DRAINING is an internal flag

We're gonna add another internal WQ flag.  Let's make the distinction
clear.  Prefix WQ_DRAINING with __ and move it to bit 16.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 2 +-
 kernel/workqueue.c        | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index e152394fa7eb..1751ec4c47c9 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -294,7 +294,7 @@ enum {
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
-	WQ_DRAINING		= 1 << 6, /* internal: workqueue is draining */
+	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2a67fbbd192c..590f4d048ec7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1225,7 +1225,7 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
 	debug_work_activate(work);
 
 	/* if dying, only works from the same workqueue are allowed */
-	if (unlikely(wq->flags & WQ_DRAINING) &&
+	if (unlikely(wq->flags & __WQ_DRAINING) &&
 	    WARN_ON_ONCE(!is_chained_work(wq)))
 		return;
 retry:
@@ -2763,11 +2763,11 @@ void drain_workqueue(struct workqueue_struct *wq)
 	/*
 	 * __queue_work() needs to test whether there are drainers, is much
 	 * hotter than drain_workqueue() and already looks at @wq->flags.
-	 * Use WQ_DRAINING so that queue doesn't have to check nr_drainers.
+	 * Use __WQ_DRAINING so that queue doesn't have to check nr_drainers.
 	 */
 	spin_lock_irq(&workqueue_lock);
 	if (!wq->nr_drainers++)
-		wq->flags |= WQ_DRAINING;
+		wq->flags |= __WQ_DRAINING;
 	spin_unlock_irq(&workqueue_lock);
 reflush:
 	flush_workqueue(wq);
@@ -2795,7 +2795,7 @@ reflush:
 
 	spin_lock(&workqueue_lock);
 	if (!--wq->nr_drainers)
-		wq->flags &= ~WQ_DRAINING;
+		wq->flags &= ~__WQ_DRAINING;
 	spin_unlock(&workqueue_lock);
 
 	local_irq_enable();
-- 
cgit v1.2.3


From 8719dceae2f98a578507c0f6b49c93f320bd729c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:04 -0700
Subject: workqueue: reject adjusting max_active or applying attrs to ordered
 workqueues

Adjusting max_active of or applying new workqueue_attrs to an ordered
workqueue breaks its ordering guarantee.  The former is obvious.  The
latter is because applying attrs creates a new pwq (pool_workqueue)
and there is no ordering constraint between the old and new pwqs.

Make apply_workqueue_attrs() and workqueue_set_max_active() trigger
WARN_ON() if those operations are requested on an ordered workqueue
and fail / ignore respectively.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/linux/workqueue.h | 3 ++-
 kernel/workqueue.c        | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1751ec4c47c9..5668ab249af5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -295,6 +295,7 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
+	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
 
 	WQ_MAX_ACTIVE		= 512,	  /* I like 512, better ideas? */
 	WQ_MAX_UNBOUND_PER_CPU	= 4,	  /* 4 * #cpus for unbound wq */
@@ -397,7 +398,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * Pointer to the allocated workqueue on success, %NULL on failure.
  */
 #define alloc_ordered_workqueue(fmt, flags, args...)			\
-	alloc_workqueue(fmt, WQ_UNBOUND | (flags), 1, ##args)
+	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
 	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 590f4d048ec7..cecd4ffe2c40 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3494,9 +3494,14 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 	struct pool_workqueue *pwq, *last_pwq;
 	struct worker_pool *pool;
 
+	/* only unbound workqueues can change attributes */
 	if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
 		return -EINVAL;
 
+	/* creating multiple pwqs breaks ordering guarantee */
+	if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
+		return -EINVAL;
+
 	pwq = kmem_cache_zalloc(pwq_cache, GFP_KERNEL);
 	if (!pwq)
 		return -ENOMEM;
@@ -3752,6 +3757,10 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 {
 	struct pool_workqueue *pwq;
 
+	/* disallow meddling with max_active for ordered workqueues */
+	if (WARN_ON(wq->flags & __WQ_ORDERED))
+		return;
+
 	max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
 
 	spin_lock_irq(&workqueue_lock);
-- 
cgit v1.2.3


From 226223ab3c4118ddd10688cc2c131135848371ab Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 11:30:05 -0700
Subject: workqueue: implement sysfs interface for workqueues

There are cases where workqueue users want to expose control knobs to
userland.  e.g. Unbound workqueues with custom attributes are
scheduled to be used for writeback workers and depending on
configuration it can be useful to allow admins to tinker with the
priority or allowed CPUs.

This patch implements workqueue_sysfs_register(), which makes the
workqueue visible under /sys/bus/workqueue/devices/WQ_NAME.  There
currently are two attributes common to both per-cpu and unbound pools
and extra attributes for unbound pools including nice level and
cpumask.

If alloc_workqueue*() is called with WQ_SYSFS,
workqueue_sysfs_register() is called automatically as part of
workqueue creation.  This is the preferred method unless the workqueue
user wants to apply workqueue_attrs before making the workqueue
visible to userland.

v2: Disallow exposing ordered workqueues as ordered workqueues can't
    be tuned in any way.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   8 ++
 kernel/workqueue.c        | 288 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 296 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 5668ab249af5..7f6d29a417c0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -293,6 +293,7 @@ enum {
 	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
+	WQ_SYSFS		= 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
 
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
@@ -495,4 +496,11 @@ extern bool freeze_workqueues_busy(void);
 extern void thaw_workqueues(void);
 #endif /* CONFIG_FREEZER */
 
+#ifdef CONFIG_SYSFS
+int workqueue_sysfs_register(struct workqueue_struct *wq);
+#else	/* CONFIG_SYSFS */
+static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
+{ return 0; }
+#endif	/* CONFIG_SYSFS */
+
 #endif
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index cecd4ffe2c40..c82feac0a878 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -210,6 +210,8 @@ struct wq_flusher {
 	struct completion	done;		/* flush completion */
 };
 
+struct wq_device;
+
 /*
  * The externally visible workqueue abstraction is an array of
  * per-CPU workqueues:
@@ -233,6 +235,10 @@ struct workqueue_struct {
 
 	int			nr_drainers;	/* W: drain in progress */
 	int			saved_max_active; /* W: saved pwq max_active */
+
+#ifdef CONFIG_SYSFS
+	struct wq_device	*wq_dev;	/* I: for sysfs interface */
+#endif
 #ifdef CONFIG_LOCKDEP
 	struct lockdep_map	lockdep_map;
 #endif
@@ -442,6 +448,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
 static DEFINE_IDR(worker_pool_idr);
 
 static int worker_thread(void *__worker);
+static void copy_workqueue_attrs(struct workqueue_attrs *to,
+				 const struct workqueue_attrs *from);
 
 /* allocate ID and assign it to @pool */
 static int worker_pool_assign_id(struct worker_pool *pool)
@@ -3153,6 +3161,281 @@ int keventd_up(void)
 	return system_wq != NULL;
 }
 
+#ifdef CONFIG_SYSFS
+/*
+ * Workqueues with WQ_SYSFS flag set is visible to userland via
+ * /sys/bus/workqueue/devices/WQ_NAME.  All visible workqueues have the
+ * following attributes.
+ *
+ *  per_cpu	RO bool	: whether the workqueue is per-cpu or unbound
+ *  max_active	RW int	: maximum number of in-flight work items
+ *
+ * Unbound workqueues have the following extra attributes.
+ *
+ *  id		RO int	: the associated pool ID
+ *  nice	RW int	: nice value of the workers
+ *  cpumask	RW mask	: bitmask of allowed CPUs for the workers
+ */
+struct wq_device {
+	struct workqueue_struct		*wq;
+	struct device			dev;
+};
+
+static struct workqueue_struct *dev_to_wq(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	return wq_dev->wq;
+}
+
+static ssize_t wq_per_cpu_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", (bool)!(wq->flags & WQ_UNBOUND));
+}
+
+static ssize_t wq_max_active_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", wq->saved_max_active);
+}
+
+static ssize_t wq_max_active_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) != 1 || val <= 0)
+		return -EINVAL;
+
+	workqueue_set_max_active(wq, val);
+	return count;
+}
+
+static struct device_attribute wq_sysfs_attrs[] = {
+	__ATTR(per_cpu, 0444, wq_per_cpu_show, NULL),
+	__ATTR(max_active, 0644, wq_max_active_show, wq_max_active_store),
+	__ATTR_NULL,
+};
+
+static ssize_t wq_pool_id_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct worker_pool *pool;
+	int written;
+
+	rcu_read_lock_sched();
+	pool = first_pwq(wq)->pool;
+	written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id);
+	rcu_read_unlock_sched();
+
+	return written;
+}
+
+static ssize_t wq_nice_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	rcu_read_lock_sched();
+	written = scnprintf(buf, PAGE_SIZE, "%d\n",
+			    first_pwq(wq)->pool->attrs->nice);
+	rcu_read_unlock_sched();
+
+	return written;
+}
+
+/* prepare workqueue_attrs for sysfs store operations */
+static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
+{
+	struct workqueue_attrs *attrs;
+
+	attrs = alloc_workqueue_attrs(GFP_KERNEL);
+	if (!attrs)
+		return NULL;
+
+	rcu_read_lock_sched();
+	copy_workqueue_attrs(attrs, first_pwq(wq)->pool->attrs);
+	rcu_read_unlock_sched();
+	return attrs;
+}
+
+static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	if (sscanf(buf, "%d", &attrs->nice) == 1 &&
+	    attrs->nice >= -20 && attrs->nice <= 19)
+		ret = apply_workqueue_attrs(wq, attrs);
+	else
+		ret = -EINVAL;
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static ssize_t wq_cpumask_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	rcu_read_lock_sched();
+	written = cpumask_scnprintf(buf, PAGE_SIZE,
+				    first_pwq(wq)->pool->attrs->cpumask);
+	rcu_read_unlock_sched();
+
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
+	return written;
+}
+
+static ssize_t wq_cpumask_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, attrs->cpumask);
+	if (!ret)
+		ret = apply_workqueue_attrs(wq, attrs);
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
+static struct device_attribute wq_sysfs_unbound_attrs[] = {
+	__ATTR(pool_id, 0444, wq_pool_id_show, NULL),
+	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
+	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+	__ATTR_NULL,
+};
+
+static struct bus_type wq_subsys = {
+	.name				= "workqueue",
+	.dev_attrs			= wq_sysfs_attrs,
+};
+
+static int __init wq_sysfs_init(void)
+{
+	return subsys_virtual_register(&wq_subsys, NULL);
+}
+core_initcall(wq_sysfs_init);
+
+static void wq_device_release(struct device *dev)
+{
+	struct wq_device *wq_dev = container_of(dev, struct wq_device, dev);
+
+	kfree(wq_dev);
+}
+
+/**
+ * workqueue_sysfs_register - make a workqueue visible in sysfs
+ * @wq: the workqueue to register
+ *
+ * Expose @wq in sysfs under /sys/bus/workqueue/devices.
+ * alloc_workqueue*() automatically calls this function if WQ_SYSFS is set
+ * which is the preferred method.
+ *
+ * Workqueue user should use this function directly iff it wants to apply
+ * workqueue_attrs before making the workqueue visible in sysfs; otherwise,
+ * apply_workqueue_attrs() may race against userland updating the
+ * attributes.
+ *
+ * Returns 0 on success, -errno on failure.
+ */
+int workqueue_sysfs_register(struct workqueue_struct *wq)
+{
+	struct wq_device *wq_dev;
+	int ret;
+
+	/*
+	 * Adjusting max_active or creating new pwqs by applyting
+	 * attributes breaks ordering guarantee.  Disallow exposing ordered
+	 * workqueues.
+	 */
+	if (WARN_ON(wq->flags & __WQ_ORDERED))
+		return -EINVAL;
+
+	wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);
+	if (!wq_dev)
+		return -ENOMEM;
+
+	wq_dev->wq = wq;
+	wq_dev->dev.bus = &wq_subsys;
+	wq_dev->dev.init_name = wq->name;
+	wq_dev->dev.release = wq_device_release;
+
+	/*
+	 * unbound_attrs are created separately.  Suppress uevent until
+	 * everything is ready.
+	 */
+	dev_set_uevent_suppress(&wq_dev->dev, true);
+
+	ret = device_register(&wq_dev->dev);
+	if (ret) {
+		kfree(wq_dev);
+		wq->wq_dev = NULL;
+		return ret;
+	}
+
+	if (wq->flags & WQ_UNBOUND) {
+		struct device_attribute *attr;
+
+		for (attr = wq_sysfs_unbound_attrs; attr->attr.name; attr++) {
+			ret = device_create_file(&wq_dev->dev, attr);
+			if (ret) {
+				device_unregister(&wq_dev->dev);
+				wq->wq_dev = NULL;
+				return ret;
+			}
+		}
+	}
+
+	kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD);
+	return 0;
+}
+
+/**
+ * workqueue_sysfs_unregister - undo workqueue_sysfs_register()
+ * @wq: the workqueue to unregister
+ *
+ * If @wq is registered to sysfs by workqueue_sysfs_register(), unregister.
+ */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
+{
+	struct wq_device *wq_dev = wq->wq_dev;
+
+	if (!wq->wq_dev)
+		return;
+
+	wq->wq_dev = NULL;
+	device_unregister(&wq_dev->dev);
+}
+#else	/* CONFIG_SYSFS */
+static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
+#endif	/* CONFIG_SYSFS */
+
 /**
  * free_workqueue_attrs - free a workqueue_attrs
  * @attrs: workqueue_attrs to free
@@ -3625,6 +3908,9 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 		wake_up_process(rescuer->task);
 	}
 
+	if ((wq->flags & WQ_SYSFS) && workqueue_sysfs_register(wq))
+		goto err_destroy;
+
 	/*
 	 * workqueue_lock protects global freeze state and workqueues
 	 * list.  Grab it, set max_active accordingly and add the new
@@ -3693,6 +3979,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
 
 	spin_unlock_irq(&workqueue_lock);
 
+	workqueue_sysfs_unregister(wq);
+
 	if (wq->rescuer) {
 		kthread_stop(wq->rescuer->task);
 		kfree(wq->rescuer);
-- 
cgit v1.2.3


From e62676169118bc2d42e5008b3f8872646313f077 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 12 Mar 2013 17:41:37 -0700
Subject: workqueue: implement current_is_workqueue_rescuer()

Implement a function which queries whether it currently is running off
a workqueue rescuer.  This will be used to convert writeback to
workqueue.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  1 +
 kernel/workqueue.c        | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 7f6d29a417c0..df30763c8682 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -451,6 +451,7 @@ extern bool cancel_delayed_work_sync(struct delayed_work *dwork);
 
 extern void workqueue_set_max_active(struct workqueue_struct *wq,
 				     int max_active);
+extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c82feac0a878..f5c8bbb9ada3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4071,6 +4071,19 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
 }
 EXPORT_SYMBOL_GPL(workqueue_set_max_active);
 
+/**
+ * current_is_workqueue_rescuer - is %current workqueue rescuer?
+ *
+ * Determine whether %current is a workqueue rescuer.  Can be used from
+ * work functions to determine whether it's being run off the rescuer task.
+ */
+bool current_is_workqueue_rescuer(void)
+{
+	struct worker *worker = current_wq_worker();
+
+	return worker && worker == worker->current_pwq->wq->rescuer;
+}
+
 /**
  * workqueue_congested - test whether a workqueue is congested
  * @cpu: CPU in question
-- 
cgit v1.2.3


From 8425e3d5bdbe8e741d2c73cf3189ed59b4038b84 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 13 Mar 2013 16:51:36 -0700
Subject: workqueue: inline trivial wrappers

There's no reason to make these trivial wrappers full (exported)
functions.  Inline the followings.

 queue_work()
 queue_delayed_work()
 mod_delayed_work()
 schedule_work_on()
 schedule_work()
 schedule_delayed_work_on()
 schedule_delayed_work()
 keventd_up()

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 123 +++++++++++++++++++++++++++++++++++++++++-----
 kernel/workqueue.c        | 111 -----------------------------------------
 2 files changed, 111 insertions(+), 123 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index df30763c8682..835d12b76960 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -417,28 +417,16 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
-extern bool queue_work(struct workqueue_struct *wq, struct work_struct *work);
 extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *work, unsigned long delay);
-extern bool queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *work, unsigned long delay);
 extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct delayed_work *dwork, unsigned long delay);
-extern bool mod_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay);
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
 extern void flush_scheduled_work(void);
 
-extern bool schedule_work_on(int cpu, struct work_struct *work);
-extern bool schedule_work(struct work_struct *work);
-extern bool schedule_delayed_work_on(int cpu, struct delayed_work *work,
-				     unsigned long delay);
-extern bool schedule_delayed_work(struct delayed_work *work,
-				  unsigned long delay);
 extern int schedule_on_each_cpu(work_func_t func);
-extern int keventd_up(void);
 
 int execute_in_process_context(work_func_t fn, struct execute_work *);
 
@@ -455,6 +443,117 @@ extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 
+/**
+ * queue_work - queue work on a workqueue
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns %false if @work was already on a queue, %true otherwise.
+ *
+ * We queue the work to the CPU on which it was submitted, but if the CPU dies
+ * it can be processed by another CPU.
+ */
+static inline bool queue_work(struct workqueue_struct *wq,
+			      struct work_struct *work)
+{
+	return queue_work_on(WORK_CPU_UNBOUND, wq, work);
+}
+
+/**
+ * queue_delayed_work - queue work on a workqueue after delay
+ * @wq: workqueue to use
+ * @dwork: delayable work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
+ */
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+				      struct delayed_work *dwork,
+				      unsigned long delay)
+{
+	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+
+/**
+ * mod_delayed_work - modify delay of or queue a delayed work
+ * @wq: workqueue to use
+ * @dwork: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * mod_delayed_work_on() on local CPU.
+ */
+static inline bool mod_delayed_work(struct workqueue_struct *wq,
+				    struct delayed_work *dwork,
+				    unsigned long delay)
+{
+	return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
+}
+
+/**
+ * schedule_work_on - put work task on a specific cpu
+ * @cpu: cpu to put the work task on
+ * @work: job to be done
+ *
+ * This puts a job on a specific cpu
+ */
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+	return queue_work_on(cpu, system_wq, work);
+}
+
+/**
+ * schedule_work - put work task in global workqueue
+ * @work: job to be done
+ *
+ * Returns %false if @work was already on the kernel-global workqueue and
+ * %true otherwise.
+ *
+ * This puts a job in the kernel-global workqueue if it was not already
+ * queued and leaves it in the same position on the kernel-global
+ * workqueue otherwise.
+ */
+static inline bool schedule_work(struct work_struct *work)
+{
+	return queue_work(system_wq, work);
+}
+
+/**
+ * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
+ * @cpu: cpu to use
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue on the specified CPU.
+ */
+static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
+					    unsigned long delay)
+{
+	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
+}
+
+/**
+ * schedule_delayed_work - put work task in global workqueue after delay
+ * @dwork: job to be done
+ * @delay: number of jiffies to wait or 0 for immediate execution
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ */
+static inline bool schedule_delayed_work(struct delayed_work *dwork,
+					 unsigned long delay)
+{
+	return queue_delayed_work(system_wq, dwork, delay);
+}
+
+/**
+ * keventd_up - is workqueue initialized yet?
+ */
+static inline bool keventd_up(void)
+{
+	return system_wq != NULL;
+}
+
 /*
  * Like above, but uses del_timer() instead of del_timer_sync(). This means,
  * if it returns 0 the timer function may be running and the queueing is in
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 147fc5a784f0..f37421fb4f35 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1340,22 +1340,6 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_work_on);
 
-/**
- * queue_work - queue work on a workqueue
- * @wq: workqueue to use
- * @work: work to queue
- *
- * Returns %false if @work was already on a queue, %true otherwise.
- *
- * We queue the work to the CPU on which it was submitted, but if the CPU dies
- * it can be processed by another CPU.
- */
-bool queue_work(struct workqueue_struct *wq, struct work_struct *work)
-{
-	return queue_work_on(WORK_CPU_UNBOUND, wq, work);
-}
-EXPORT_SYMBOL_GPL(queue_work);
-
 void delayed_work_timer_fn(unsigned long __data)
 {
 	struct delayed_work *dwork = (struct delayed_work *)__data;
@@ -1430,21 +1414,6 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-/**
- * queue_delayed_work - queue work on a workqueue after delay
- * @wq: workqueue to use
- * @dwork: delayable work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * Equivalent to queue_delayed_work_on() but tries to use the local CPU.
- */
-bool queue_delayed_work(struct workqueue_struct *wq,
-			struct delayed_work *dwork, unsigned long delay)
-{
-	return queue_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
-}
-EXPORT_SYMBOL_GPL(queue_delayed_work);
-
 /**
  * mod_delayed_work_on - modify delay of or queue a delayed work on specific CPU
  * @cpu: CPU number to execute work on
@@ -1483,21 +1452,6 @@ bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(mod_delayed_work_on);
 
-/**
- * mod_delayed_work - modify delay of or queue a delayed work
- * @wq: workqueue to use
- * @dwork: work to queue
- * @delay: number of jiffies to wait before queueing
- *
- * mod_delayed_work_on() on local CPU.
- */
-bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
-		      unsigned long delay)
-{
-	return mod_delayed_work_on(WORK_CPU_UNBOUND, wq, dwork, delay);
-}
-EXPORT_SYMBOL_GPL(mod_delayed_work);
-
 /**
  * worker_enter_idle - enter idle state
  * @worker: worker which is entering idle state
@@ -3001,66 +2955,6 @@ bool cancel_delayed_work_sync(struct delayed_work *dwork)
 }
 EXPORT_SYMBOL(cancel_delayed_work_sync);
 
-/**
- * schedule_work_on - put work task on a specific cpu
- * @cpu: cpu to put the work task on
- * @work: job to be done
- *
- * This puts a job on a specific cpu
- */
-bool schedule_work_on(int cpu, struct work_struct *work)
-{
-	return queue_work_on(cpu, system_wq, work);
-}
-EXPORT_SYMBOL(schedule_work_on);
-
-/**
- * schedule_work - put work task in global workqueue
- * @work: job to be done
- *
- * Returns %false if @work was already on the kernel-global workqueue and
- * %true otherwise.
- *
- * This puts a job in the kernel-global workqueue if it was not already
- * queued and leaves it in the same position on the kernel-global
- * workqueue otherwise.
- */
-bool schedule_work(struct work_struct *work)
-{
-	return queue_work(system_wq, work);
-}
-EXPORT_SYMBOL(schedule_work);
-
-/**
- * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
- * @cpu: cpu to use
- * @dwork: job to be done
- * @delay: number of jiffies to wait
- *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue on the specified CPU.
- */
-bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
-			      unsigned long delay)
-{
-	return queue_delayed_work_on(cpu, system_wq, dwork, delay);
-}
-EXPORT_SYMBOL(schedule_delayed_work_on);
-
-/**
- * schedule_delayed_work - put work task in global workqueue after delay
- * @dwork: job to be done
- * @delay: number of jiffies to wait or 0 for immediate execution
- *
- * After waiting for a given time this puts a job in the kernel-global
- * workqueue.
- */
-bool schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
-{
-	return queue_delayed_work(system_wq, dwork, delay);
-}
-EXPORT_SYMBOL(schedule_delayed_work);
-
 /**
  * schedule_on_each_cpu - execute a function synchronously on each online CPU
  * @func: the function to call
@@ -3154,11 +3048,6 @@ int execute_in_process_context(work_func_t fn, struct execute_work *ew)
 }
 EXPORT_SYMBOL_GPL(execute_in_process_context);
 
-int keventd_up(void)
-{
-	return system_wq != NULL;
-}
-
 #ifdef CONFIG_SYSFS
 /*
  * Workqueues with WQ_SYSFS flag set is visible to userland via
-- 
cgit v1.2.3


From d55262c4d164759a8debe772da6c9b16059dec47 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Apr 2013 11:23:38 -0700
Subject: workqueue: update sysfs interface to reflect NUMA awareness and a
 kernel param to disable NUMA affinity

Unbound workqueues are now NUMA aware.  Let's add some control knobs
and update sysfs interface accordingly.

* Add kernel param workqueue.numa_disable which disables NUMA affinity
  globally.

* Replace sysfs file "pool_id" with "pool_ids" which contain
  node:pool_id pairs.  This change is userland-visible but "pool_id"
  hasn't seen a release yet, so this is okay.

* Add a new sysf files "numa" which can toggle NUMA affinity on
  individual workqueues.  This is implemented as attrs->no_numa whichn
  is special in that it isn't part of a pool's attributes.  It only
  affects how apply_workqueue_attrs() picks which pools to use.

After "pool_ids" change, first_pwq() doesn't have any user left.
Removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 Documentation/kernel-parameters.txt |  9 ++++
 include/linux/workqueue.h           |  5 +++
 kernel/workqueue.c                  | 82 ++++++++++++++++++++++++++-----------
 3 files changed, 73 insertions(+), 23 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4609e81dbc37..c75ea0b8ec59 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3222,6 +3222,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			or other driver-specific files in the
 			Documentation/watchdog/ directory.
 
+	workqueue.disable_numa
+			By default, all work items queued to unbound
+			workqueues are affine to the NUMA nodes they're
+			issued on, which results in better behavior in
+			general.  If NUMA affinity needs to be disabled for
+			whatever reason, this option can be used.  Note
+			that this also can be controlled per-workqueue for
+			workqueues visible under /sys/bus/workqueue/.
+
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 835d12b76960..717975639378 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -119,10 +119,15 @@ struct delayed_work {
 /*
  * A struct for workqueue attributes.  This can be used to change
  * attributes of an unbound workqueue.
+ *
+ * Unlike other fields, ->no_numa isn't a property of a worker_pool.  It
+ * only modifies how apply_workqueue_attrs() select pools and thus doesn't
+ * participate in pool hash calculations or equality comparisons.
  */
 struct workqueue_attrs {
 	int			nice;		/* nice level */
 	cpumask_var_t		cpumask;	/* allowed CPUs */
+	bool			no_numa;	/* disable NUMA affinity */
 };
 
 static inline struct delayed_work *to_delayed_work(struct work_struct *work)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 57cd77de4a4f..729ac6a44860 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -268,6 +268,9 @@ static int wq_numa_tbl_len;		/* highest possible NUMA node id + 1 */
 static cpumask_var_t *wq_numa_possible_cpumask;
 					/* possible CPUs of each node */
 
+static bool wq_disable_numa;
+module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -516,21 +519,6 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 	return ret;
 }
 
-/**
- * first_pwq - return the first pool_workqueue of the specified workqueue
- * @wq: the target workqueue
- *
- * This must be called either with wq->mutex held or sched RCU read locked.
- * If the pwq needs to be used beyond the locking in effect, the caller is
- * responsible for guaranteeing that the pwq stays online.
- */
-static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
-{
-	assert_rcu_or_wq_mutex(wq);
-	return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
-				      pwqs_node);
-}
-
 /**
  * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
  * @wq: the target workqueue
@@ -3114,16 +3102,21 @@ static struct device_attribute wq_sysfs_attrs[] = {
 	__ATTR_NULL,
 };
 
-static ssize_t wq_pool_id_show(struct device *dev,
-			       struct device_attribute *attr, char *buf)
+static ssize_t wq_pool_ids_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
 {
 	struct workqueue_struct *wq = dev_to_wq(dev);
-	struct worker_pool *pool;
-	int written;
+	const char *delim = "";
+	int node, written = 0;
 
 	rcu_read_lock_sched();
-	pool = first_pwq(wq)->pool;
-	written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id);
+	for_each_node(node) {
+		written += scnprintf(buf + written, PAGE_SIZE - written,
+				     "%s%d:%d", delim, node,
+				     unbound_pwq_by_node(wq, node)->pool->id);
+		delim = " ";
+	}
+	written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
 	rcu_read_unlock_sched();
 
 	return written;
@@ -3212,10 +3205,46 @@ static ssize_t wq_cpumask_store(struct device *dev,
 	return ret ?: count;
 }
 
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	int written;
+
+	mutex_lock(&wq->mutex);
+	written = scnprintf(buf, PAGE_SIZE, "%d\n",
+			    !wq->unbound_attrs->no_numa);
+	mutex_unlock(&wq->mutex);
+
+	return written;
+}
+
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct workqueue_struct *wq = dev_to_wq(dev);
+	struct workqueue_attrs *attrs;
+	int v, ret;
+
+	attrs = wq_sysfs_prep_attrs(wq);
+	if (!attrs)
+		return -ENOMEM;
+
+	ret = -EINVAL;
+	if (sscanf(buf, "%d", &v) == 1) {
+		attrs->no_numa = !v;
+		ret = apply_workqueue_attrs(wq, attrs);
+	}
+
+	free_workqueue_attrs(attrs);
+	return ret ?: count;
+}
+
 static struct device_attribute wq_sysfs_unbound_attrs[] = {
-	__ATTR(pool_id, 0444, wq_pool_id_show, NULL),
+	__ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
 	__ATTR(nice, 0644, wq_nice_show, wq_nice_store),
 	__ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+	__ATTR(numa, 0644, wq_numa_show, wq_numa_store),
 	__ATTR_NULL,
 };
 
@@ -3750,7 +3779,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
 static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
 				 int cpu_going_down, cpumask_t *cpumask)
 {
-	if (!wq_numa_enabled)
+	if (!wq_numa_enabled || attrs->no_numa)
 		goto use_dfl;
 
 	/* does @node have any online CPUs @attrs wants? */
@@ -3951,6 +3980,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 	cpumask = target_attrs->cpumask;
 
 	mutex_lock(&wq->mutex);
+	if (wq->unbound_attrs->no_numa)
+		goto out_unlock;
 
 	copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
 	pwq = unbound_pwq_by_node(wq, node);
@@ -4763,6 +4794,11 @@ static void __init wq_numa_init(void)
 	if (num_possible_nodes() <= 1)
 		return;
 
+	if (wq_disable_numa) {
+		pr_info("workqueue: NUMA affinity support disabled\n");
+		return;
+	}
+
 	wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
 	BUG_ON(!wq_update_unbound_numa_attrs_buf);
 
-- 
cgit v1.2.3


From 3d1cb2059d9374e58da481b783332cf191cb6620 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 30 Apr 2013 15:27:22 -0700
Subject: workqueue: include workqueue info when printing debug dump of a
 worker task

One of the problems that arise when converting dedicated custom
threadpool to workqueue is that the shared worker pool used by workqueue
anonimizes each worker making it more difficult to identify what the
worker was doing on which target from the output of sysrq-t or debug
dump from oops, BUG() and friends.

This patch implements set_worker_desc() which can be called from any
workqueue work function to set its description.  When the worker task is
dumped for whatever reason - sysrq-t, WARN, BUG, oops, lockdep assertion
and so on - the description will be printed out together with the
workqueue name and the worker function pointer.

The printing side is implemented by print_worker_info() which is called
from functions in task dump paths - sched_show_task() and
dump_stack_print_info().  print_worker_info() can be safely called on
any task in any state as long as the task struct itself is accessible.
It uses probe_*() functions to access worker fields.  It may print
garbage if something went very wrong, but it wouldn't cause (another)
oops.

The description is currently limited to 24bytes including the
terminating \0.  worker->desc_valid and workder->desc[] are added and
the 64 bytes marker which was already incorrect before adding the new
fields is moved to the correct position.

Here's an example dump with writeback updated to set the bdi name as
worker desc.

 Hardware name: Bochs
 Modules linked in:
 Pid: 7, comm: kworker/u9:0 Not tainted 3.9.0-rc1-work+ #1
 Workqueue: writeback bdi_writeback_workfn (flush-8:0)
  ffffffff820a3ab0 ffff88000f6e9cb8 ffffffff81c61845 ffff88000f6e9cf8
  ffffffff8108f50f 0000000000000000 0000000000000000 ffff88000cde16b0
  ffff88000cde1aa8 ffff88001ee19240 ffff88000f6e9fd8 ffff88000f6e9d08
 Call Trace:
  [<ffffffff81c61845>] dump_stack+0x19/0x1b
  [<ffffffff8108f50f>] warn_slowpath_common+0x7f/0xc0
  [<ffffffff8108f56a>] warn_slowpath_null+0x1a/0x20
  [<ffffffff81200150>] bdi_writeback_workfn+0x2a0/0x3b0
 ...

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h   |  5 +++
 kernel/printk.c             |  2 ++
 kernel/sched/core.c         |  1 +
 kernel/workqueue.c          | 79 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/workqueue_internal.h | 12 ++++++-
 5 files changed, 98 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 717975639378..623488fdc1f5 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -92,6 +92,9 @@ enum {
 	/* bit mask for work_busy() return values */
 	WORK_BUSY_PENDING	= 1 << 0,
 	WORK_BUSY_RUNNING	= 1 << 1,
+
+	/* maximum string length for set_worker_desc() */
+	WORKER_DESC_LEN		= 24,
 };
 
 struct work_struct {
@@ -447,6 +450,8 @@ extern void workqueue_set_max_active(struct workqueue_struct *wq,
 extern bool current_is_workqueue_rescuer(void);
 extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
+extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
+extern void print_worker_info(const char *log_lvl, struct task_struct *task);
 
 /**
  * queue_work - queue work on a workqueue
diff --git a/kernel/printk.c b/kernel/printk.c
index e10ad515901f..96dcfcd9a2d4 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -2891,6 +2891,8 @@ void dump_stack_print_info(const char *log_lvl)
 	if (dump_stack_arch_desc_str[0] != '\0')
 		printk("%sHardware name: %s\n",
 		       log_lvl, dump_stack_arch_desc_str);
+
+	print_worker_info(log_lvl, current);
 }
 
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c70a8814a767..5662f58f0b69 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4586,6 +4586,7 @@ void sched_show_task(struct task_struct *p)
 		task_pid_nr(p), ppid,
 		(unsigned long)task_thread_info(p)->flags);
 
+	print_worker_info(KERN_INFO, p);
 	show_stack(p, NULL);
 }
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 154aa12af48e..4aa9f5bc6b2d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -46,6 +46,7 @@
 #include <linux/rculist.h>
 #include <linux/nodemask.h>
 #include <linux/moduleparam.h>
+#include <linux/uaccess.h>
 
 #include "workqueue_internal.h"
 
@@ -2197,6 +2198,7 @@ __acquires(&pool->lock)
 	worker->current_work = NULL;
 	worker->current_func = NULL;
 	worker->current_pwq = NULL;
+	worker->desc_valid = false;
 	pwq_dec_nr_in_flight(pwq, work_color);
 }
 
@@ -4365,6 +4367,83 @@ unsigned int work_busy(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(work_busy);
 
+/**
+ * set_worker_desc - set description for the current work item
+ * @fmt: printf-style format string
+ * @...: arguments for the format string
+ *
+ * This function can be called by a running work function to describe what
+ * the work item is about.  If the worker task gets dumped, this
+ * information will be printed out together to help debugging.  The
+ * description can be at most WORKER_DESC_LEN including the trailing '\0'.
+ */
+void set_worker_desc(const char *fmt, ...)
+{
+	struct worker *worker = current_wq_worker();
+	va_list args;
+
+	if (worker) {
+		va_start(args, fmt);
+		vsnprintf(worker->desc, sizeof(worker->desc), fmt, args);
+		va_end(args);
+		worker->desc_valid = true;
+	}
+}
+
+/**
+ * print_worker_info - print out worker information and description
+ * @log_lvl: the log level to use when printing
+ * @task: target task
+ *
+ * If @task is a worker and currently executing a work item, print out the
+ * name of the workqueue being serviced and worker description set with
+ * set_worker_desc() by the currently executing work item.
+ *
+ * This function can be safely called on any task as long as the
+ * task_struct itself is accessible.  While safe, this function isn't
+ * synchronized and may print out mixups or garbages of limited length.
+ */
+void print_worker_info(const char *log_lvl, struct task_struct *task)
+{
+	work_func_t *fn = NULL;
+	char name[WQ_NAME_LEN] = { };
+	char desc[WORKER_DESC_LEN] = { };
+	struct pool_workqueue *pwq = NULL;
+	struct workqueue_struct *wq = NULL;
+	bool desc_valid = false;
+	struct worker *worker;
+
+	if (!(task->flags & PF_WQ_WORKER))
+		return;
+
+	/*
+	 * This function is called without any synchronization and @task
+	 * could be in any state.  Be careful with dereferences.
+	 */
+	worker = probe_kthread_data(task);
+
+	/*
+	 * Carefully copy the associated workqueue's workfn and name.  Keep
+	 * the original last '\0' in case the original contains garbage.
+	 */
+	probe_kernel_read(&fn, &worker->current_func, sizeof(fn));
+	probe_kernel_read(&pwq, &worker->current_pwq, sizeof(pwq));
+	probe_kernel_read(&wq, &pwq->wq, sizeof(wq));
+	probe_kernel_read(name, wq->name, sizeof(name) - 1);
+
+	/* copy worker description */
+	probe_kernel_read(&desc_valid, &worker->desc_valid, sizeof(desc_valid));
+	if (desc_valid)
+		probe_kernel_read(desc, worker->desc, sizeof(desc) - 1);
+
+	if (fn || name[0] || desc[0]) {
+		printk("%sWorkqueue: %s %pf", log_lvl, name, fn);
+		if (desc[0])
+			pr_cont(" (%s)", desc);
+		pr_cont("\n");
+	}
+}
+
 /*
  * CPU hotplug.
  *
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h
index 84ab6e1dc6fb..ad83c96b2ece 100644
--- a/kernel/workqueue_internal.h
+++ b/kernel/workqueue_internal.h
@@ -29,15 +29,25 @@ struct worker {
 	struct work_struct	*current_work;	/* L: work being processed */
 	work_func_t		current_func;	/* L: current_work's fn */
 	struct pool_workqueue	*current_pwq; /* L: current_work's pwq */
+	bool			desc_valid;	/* ->desc is valid */
 	struct list_head	scheduled;	/* L: scheduled works */
+
+	/* 64 bytes boundary on 64bit, 32 on 32bit */
+
 	struct task_struct	*task;		/* I: worker task */
 	struct worker_pool	*pool;		/* I: the associated pool */
 						/* L: for rescuers */
-	/* 64 bytes boundary on 64bit, 32 on 32bit */
+
 	unsigned long		last_active;	/* L: last active timestamp */
 	unsigned int		flags;		/* X: flags */
 	int			id;		/* I: worker id */
 
+	/*
+	 * Opaque string set with work_set_desc().  Printed out with task
+	 * dump for debugging - WARN, BUG, panic or sysrq.
+	 */
+	char			desc[WORKER_DESC_LEN];
+
 	/* used only by rescuers to point to the target workqueue */
 	struct workqueue_struct	*rescue_wq;	/* I: the workqueue to rescue */
 };
-- 
cgit v1.2.3


From cee22a15052faa817e3ec8985a28154d3fabc7aa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Mon, 8 Apr 2013 16:45:40 +0530
Subject: workqueues: Introduce new flag WQ_POWER_EFFICIENT for power oriented
 workqueues

Workqueues can be performance or power-oriented. Currently, most workqueues are
bound to the CPU they were created on. This gives good performance (due to cache
effects) at the cost of potentially waking up otherwise idle cores (Idle from
scheduler's perspective. Which may or may not be physically idle) just to
process some work. To save power, we can allow the work to be rescheduled on a
core that is already awake.

Workqueues created with the WQ_UNBOUND flag will allow some power savings.
However, we don't change the default behaviour of the system.  To enable
power-saving behaviour, a new config option CONFIG_WQ_POWER_EFFICIENT needs to
be turned on. This option can also be overridden by the
workqueue.power_efficient boot parameter.

tj: Updated config description and comments.  Renamed
    CONFIG_WQ_POWER_EFFICIENT to CONFIG_WQ_POWER_EFFICIENT_DEFAULT.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Amit Kucheria <amit.kucheria@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/kernel-parameters.txt | 15 +++++++++++++++
 include/linux/workqueue.h           | 27 +++++++++++++++++++++++++++
 kernel/power/Kconfig                | 20 ++++++++++++++++++++
 kernel/workqueue.c                  | 13 +++++++++++++
 4 files changed, 75 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index c3bfacb92910..37dfd720de79 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3320,6 +3320,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			that this also can be controlled per-workqueue for
 			workqueues visible under /sys/bus/workqueue/.
 
+	workqueue.power_efficient
+			Per-cpu workqueues are generally preferred because
+			they show better performance thanks to cache
+			locality; unfortunately, per-cpu workqueues tend to
+			be more power hungry than unbound workqueues.
+
+			Enabling this makes the per-cpu workqueues which
+			were observed to contribute significantly to power
+			consumption unbound, leading to measurably lower
+			power usage at the cost of small performance
+			overhead.
+
+			The default value of this parameter is determined by
+			the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+
 	x2apic_phys	[X86-64,APIC] Use x2apic physical mode instead of
 			default x2apic cluster mode on platforms
 			supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 623488fdc1f5..fc0136b604f2 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -303,6 +303,33 @@ enum {
 	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
 	WQ_SYSFS		= 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
 
+	/*
+	 * Per-cpu workqueues are generally preferred because they tend to
+	 * show better performance thanks to cache locality.  Per-cpu
+	 * workqueues exclude the scheduler from choosing the CPU to
+	 * execute the worker threads, which has an unfortunate side effect
+	 * of increasing power consumption.
+	 *
+	 * The scheduler considers a CPU idle if it doesn't have any task
+	 * to execute and tries to keep idle cores idle to conserve power;
+	 * however, for example, a per-cpu work item scheduled from an
+	 * interrupt handler on an idle CPU will force the scheduler to
+	 * excute the work item on that CPU breaking the idleness, which in
+	 * turn may lead to more scheduling choices which are sub-optimal
+	 * in terms of power consumption.
+	 *
+	 * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default
+	 * but become unbound if workqueue.power_efficient kernel param is
+	 * specified.  Per-cpu workqueues which are identified to
+	 * contribute significantly to power-consumption are identified and
+	 * marked with this flag and enabling the power_efficient mode
+	 * leads to noticeable power saving at the cost of small
+	 * performance disadvantage.
+	 *
+	 * http://thread.gmane.org/gmane.linux.kernel/1480396
+	 */
+	WQ_POWER_EFFICIENT	= 1 << 7,
+
 	__WQ_DRAINING		= 1 << 16, /* internal: workqueue is draining */
 	__WQ_ORDERED		= 1 << 17, /* internal: workqueue is ordered */
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 5dfdc9ea180b..46455961a88f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -263,6 +263,26 @@ config PM_GENERIC_DOMAINS
 	bool
 	depends on PM
 
+config WQ_POWER_EFFICIENT_DEFAULT
+	bool "Enable workqueue power-efficient mode by default"
+	depends on PM
+	default n
+	help
+	  Per-cpu workqueues are generally preferred because they show
+	  better performance thanks to cache locality; unfortunately,
+	  per-cpu workqueues tend to be more power hungry than unbound
+	  workqueues.
+
+	  Enabling workqueue.power_efficient kernel parameter makes the
+	  per-cpu workqueues which were observed to contribute
+	  significantly to power consumption unbound, leading to measurably
+	  lower power usage at the cost of small performance overhead.
+
+	  This config option determines whether workqueue.power_efficient
+	  is enabled by default.
+
+	  If in doubt, say N.
+
 config PM_GENERIC_DOMAINS_SLEEP
 	def_bool y
 	depends on PM_SLEEP && PM_GENERIC_DOMAINS
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4aa9f5bc6b2d..8068d97ce141 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
 static bool wq_disable_numa;
 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 
+/* see the comment above the definition of WQ_POWER_EFFICIENT */
+#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
+static bool wq_power_efficient = true;
+#else
+static bool wq_power_efficient;
+#endif
+
+module_param_named(power_efficient, wq_power_efficient, bool, 0444);
+
 static bool wq_numa_enabled;		/* unbound NUMA affinity enabled */
 
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -4085,6 +4094,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
 	struct workqueue_struct *wq;
 	struct pool_workqueue *pwq;
 
+	/* see the comment above the definition of WQ_POWER_EFFICIENT */
+	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
+		flags |= WQ_UNBOUND;
+
 	/* allocate wq and format name */
 	if (flags & WQ_UNBOUND)
 		tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
-- 
cgit v1.2.3


From 0668106ca3865ba945e155097fb042bf66d364d3 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 24 Apr 2013 17:12:54 +0530
Subject: workqueue: Add system wide power_efficient workqueues

This patch adds system wide workqueues aligned towards power saving. This is
done by allocating them with WQ_UNBOUND flag if 'wq_power_efficient' is set to
'true'.

tj: updated comments a bit.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |  8 ++++++++
 kernel/workqueue.c        | 13 ++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index fc0136b604f2..a9f4119c7e2e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -360,11 +360,19 @@ enum {
  *
  * system_freezable_wq is equivalent to system_wq except that it's
  * freezable.
+ *
+ * *_power_efficient_wq are inclined towards saving power and converted
+ * into WQ_UNBOUND variants if 'wq_power_efficient' is enabled; otherwise,
+ * they are same as their non-power-efficient counterparts - e.g.
+ * system_power_efficient_wq is identical to system_wq if
+ * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  */
 extern struct workqueue_struct *system_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
+extern struct workqueue_struct *system_power_efficient_wq;
+extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
 static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
 {
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8068d97ce141..16ca2d3dd29f 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -314,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_power_efficient_wq);
+struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 
 static int worker_thread(void *__worker);
 static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -4987,8 +4991,15 @@ static int __init init_workqueues(void)
 					    WQ_UNBOUND_MAX_ACTIVE);
 	system_freezable_wq = alloc_workqueue("events_freezable",
 					      WQ_FREEZABLE, 0);
+	system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+					      WQ_POWER_EFFICIENT, 0);
+	system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+					      WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+					      0);
 	BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-	       !system_unbound_wq || !system_freezable_wq);
+	       !system_unbound_wq || !system_freezable_wq ||
+	       !system_power_efficient_wq ||
+	       !system_freezable_power_efficient_wq);
 	return 0;
 }
 early_initcall(init_workqueues);
-- 
cgit v1.2.3


From d8537548c924db3c44afde7646b6e220c7beb79d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 3 Jul 2013 15:04:57 -0700
Subject: drivers: avoid format strings in names passed to alloc_workqueue()

For the workqueue creation interfaces that do not expect format strings,
make sure they cannot accidently be parsed that way.  Additionally, clean
up calls made with a single parameter that would be handled as a format
string.  Many callers are passing potentially dynamic string content, so
use "%s" in those cases to avoid any potential accidents.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 crypto/pcrypt.c                         | 4 ++--
 drivers/media/pci/cx18/cx18-driver.c    | 2 +-
 drivers/message/i2o/driver.c            | 4 ++--
 drivers/net/wireless/rt2x00/rt2x00dev.c | 2 +-
 drivers/net/wireless/rtlwifi/base.c     | 2 +-
 drivers/pci/hotplug/pciehp_hpc.c        | 4 +---
 drivers/pci/hotplug/shpchp_core.c       | 3 +--
 drivers/scsi/be2iscsi/be_main.c         | 2 +-
 drivers/scsi/qla4xxx/ql4_os.c           | 4 ++--
 drivers/scsi/scsi_transport_fc.c        | 6 +++---
 include/linux/workqueue.h               | 7 ++++---
 net/bluetooth/hci_core.c                | 9 ++++-----
 net/mac80211/main.c                     | 2 +-
 13 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index b2c99dc1c5e2..f8c920cafe63 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -455,8 +455,8 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
 
 	get_online_cpus();
 
-	pcrypt->wq = alloc_workqueue(name,
-				     WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1);
+	pcrypt->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE,
+				     1, name);
 	if (!pcrypt->wq)
 		goto err;
 
diff --git a/drivers/media/pci/cx18/cx18-driver.c b/drivers/media/pci/cx18/cx18-driver.c
index 67b61cf3e03a..004d8ace5019 100644
--- a/drivers/media/pci/cx18/cx18-driver.c
+++ b/drivers/media/pci/cx18/cx18-driver.c
@@ -695,7 +695,7 @@ static int cx18_create_in_workq(struct cx18 *cx)
 {
 	snprintf(cx->in_workq_name, sizeof(cx->in_workq_name), "%s-in",
 		 cx->v4l2_dev.name);
-	cx->in_work_queue = alloc_ordered_workqueue(cx->in_workq_name, 0);
+	cx->in_work_queue = alloc_ordered_workqueue("%s", 0, cx->in_workq_name);
 	if (cx->in_work_queue == NULL) {
 		CX18_ERR("Unable to create incoming mailbox handler thread\n");
 		return -ENOMEM;
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index 8a5b2d8f4daf..813eaa33fa14 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -84,8 +84,8 @@ int i2o_driver_register(struct i2o_driver *drv)
 	osm_debug("Register driver %s\n", drv->name);
 
 	if (drv->event) {
-		drv->event_queue = alloc_workqueue(drv->name,
-						   WQ_MEM_RECLAIM, 1);
+		drv->event_queue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 1,
+						   drv->name);
 		if (!drv->event_queue) {
 			osm_err("Could not initialize event queue for driver "
 				"%s\n", drv->name);
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 90dc14336980..c8b9ef0c21f8 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -1321,7 +1321,7 @@ int rt2x00lib_probe_dev(struct rt2x00_dev *rt2x00dev)
 	 * Initialize work.
 	 */
 	rt2x00dev->workqueue =
-	    alloc_ordered_workqueue(wiphy_name(rt2x00dev->hw->wiphy), 0);
+	    alloc_ordered_workqueue("%s", 0, wiphy_name(rt2x00dev->hw->wiphy));
 	if (!rt2x00dev->workqueue) {
 		retval = -ENOMEM;
 		goto exit;
diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c
index af59dd5718e1..a5f223145b0f 100644
--- a/drivers/net/wireless/rtlwifi/base.c
+++ b/drivers/net/wireless/rtlwifi/base.c
@@ -380,7 +380,7 @@ static void _rtl_init_deferred_work(struct ieee80211_hw *hw)
 
 	/* <2> work queue */
 	rtlpriv->works.hw = hw;
-	rtlpriv->works.rtl_wq = alloc_workqueue(rtlpriv->cfg->name, 0, 0);
+	rtlpriv->works.rtl_wq = alloc_workqueue("%s", 0, 0, rtlpriv->cfg->name);
 	INIT_DELAYED_WORK(&rtlpriv->works.watchdog_wq,
 			  (void *)rtl_watchdog_wq_callback);
 	INIT_DELAYED_WORK(&rtlpriv->works.ips_nic_off_wq,
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 5127f3f41821..b2255736ac81 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -773,14 +773,12 @@ static void pcie_shutdown_notification(struct controller *ctrl)
 static int pcie_init_slot(struct controller *ctrl)
 {
 	struct slot *slot;
-	char name[32];
 
 	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
 	if (!slot)
 		return -ENOMEM;
 
-	snprintf(name, sizeof(name), "pciehp-%u", PSN(ctrl));
-	slot->wq = alloc_workqueue(name, 0, 0);
+	slot->wq = alloc_workqueue("pciehp-%u", 0, 0, PSN(ctrl));
 	if (!slot->wq)
 		goto abort;
 
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 3100c52c837c..d3f757df691c 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -128,8 +128,7 @@ static int init_slots(struct controller *ctrl)
 		slot->hpc_ops = ctrl->hpc_ops;
 		slot->number = ctrl->first_slot + (ctrl->slot_num_inc * i);
 
-		snprintf(name, sizeof(name), "shpchp-%d", slot->number);
-		slot->wq = alloc_workqueue(name, 0, 0);
+		slot->wq = alloc_workqueue("shpchp-%d", 0, 0, slot->number);
 		if (!slot->wq) {
 			retval = -ENOMEM;
 			goto error_info;
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index d24a2867bc21..a1f5ac7a9806 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -4996,7 +4996,7 @@ static int beiscsi_dev_probe(struct pci_dev *pcidev,
 
 	snprintf(phba->wq_name, sizeof(phba->wq_name), "beiscsi_%02x_wq",
 		 phba->shost->host_no);
-	phba->wq = alloc_workqueue(phba->wq_name, WQ_MEM_RECLAIM, 1);
+	phba->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, phba->wq_name);
 	if (!phba->wq) {
 		beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_INIT,
 			    "BM_%d : beiscsi_dev_probe-"
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 4d231c12463e..b246b3c26912 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -7060,8 +7060,8 @@ skip_retry_init:
 	}
 	INIT_WORK(&ha->dpc_work, qla4xxx_do_dpc);
 
-	sprintf(buf, "qla4xxx_%lu_task", ha->host_no);
-	ha->task_wq = alloc_workqueue(buf, WQ_MEM_RECLAIM, 1);
+	ha->task_wq = alloc_workqueue("qla4xxx_%lu_task", WQ_MEM_RECLAIM, 1,
+				      ha->host_no);
 	if (!ha->task_wq) {
 		ql4_printk(KERN_WARNING, ha, "Unable to start task thread!\n");
 		ret = -ENODEV;
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index e106c276aa00..4628fd5e0688 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -435,7 +435,7 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 
 	snprintf(fc_host->work_q_name, sizeof(fc_host->work_q_name),
 		 "fc_wq_%d", shost->host_no);
-	fc_host->work_q = alloc_workqueue(fc_host->work_q_name, 0, 0);
+	fc_host->work_q = alloc_workqueue("%s", 0, 0, fc_host->work_q_name);
 	if (!fc_host->work_q)
 		return -ENOMEM;
 
@@ -443,8 +443,8 @@ static int fc_host_setup(struct transport_container *tc, struct device *dev,
 	snprintf(fc_host->devloss_work_q_name,
 		 sizeof(fc_host->devloss_work_q_name),
 		 "fc_dl_%d", shost->host_no);
-	fc_host->devloss_work_q =
-			alloc_workqueue(fc_host->devloss_work_q_name, 0, 0);
+	fc_host->devloss_work_q = alloc_workqueue("%s", 0, 0,
+					fc_host->devloss_work_q_name);
 	if (!fc_host->devloss_work_q) {
 		destroy_workqueue(fc_host->work_q);
 		fc_host->work_q = NULL;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a9f4119c7e2e..a0ed78ab54d7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -445,11 +445,12 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args)
 
 #define create_workqueue(name)						\
-	alloc_workqueue((name), WQ_MEM_RECLAIM, 1)
+	alloc_workqueue("%s", WQ_MEM_RECLAIM, 1, (name))
 #define create_freezable_workqueue(name)				\
-	alloc_workqueue((name), WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
+	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
+			1, (name))
 #define create_singlethread_workqueue(name)				\
-	alloc_workqueue((name), WQ_UNBOUND | WQ_MEM_RECLAIM, 1)
+	alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name))
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ace5e55fe5a3..db7de80b88a2 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2211,16 +2211,15 @@ int hci_register_dev(struct hci_dev *hdev)
 	list_add(&hdev->list, &hci_dev_list);
 	write_unlock(&hci_dev_list_lock);
 
-	hdev->workqueue = alloc_workqueue(hdev->name, WQ_HIGHPRI | WQ_UNBOUND |
-					  WQ_MEM_RECLAIM, 1);
+	hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
+					  WQ_MEM_RECLAIM, 1, hdev->name);
 	if (!hdev->workqueue) {
 		error = -ENOMEM;
 		goto err;
 	}
 
-	hdev->req_workqueue = alloc_workqueue(hdev->name,
-					      WQ_HIGHPRI | WQ_UNBOUND |
-					      WQ_MEM_RECLAIM, 1);
+	hdev->req_workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
+					      WQ_MEM_RECLAIM, 1, hdev->name);
 	if (!hdev->req_workqueue) {
 		destroy_workqueue(hdev->workqueue);
 		error = -ENOMEM;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 8a7bfc47d577..8eae74ac4e1e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -921,7 +921,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		hw->queues = IEEE80211_MAX_QUEUES;
 
 	local->workqueue =
-		alloc_ordered_workqueue(wiphy_name(local->hw.wiphy), 0);
+		alloc_ordered_workqueue("%s", 0, wiphy_name(local->hw.wiphy));
 	if (!local->workqueue) {
 		result = -ENOMEM;
 		goto fail_workqueue;
-- 
cgit v1.2.3


From 1207637304990374231fe4e9aeb527904f4ec1e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 30 Jul 2013 08:30:16 -0400
Subject: workqueue: mark WQ_NON_REENTRANT deprecated

dbf2576e37 ("workqueue: make all workqueues non-reentrant") made
WQ_NON_REENTRANT no-op but the following patches didn't remove the
flag or update the documentation.  Let's mark the flag deprecated and
update the documentation accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/workqueue.txt | 18 ++++++------------
 include/linux/workqueue.h   |  7 ++++++-
 2 files changed, 12 insertions(+), 13 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
index a6ab4b62d926..67113f622880 100644
--- a/Documentation/workqueue.txt
+++ b/Documentation/workqueue.txt
@@ -100,8 +100,8 @@ Subsystems and drivers can create and queue work items through special
 workqueue API functions as they see fit. They can influence some
 aspects of the way the work items are executed by setting flags on the
 workqueue they are putting the work item on. These flags include
-things like CPU locality, reentrancy, concurrency limits, priority and
-more.  To get a detailed overview refer to the API description of
+things like CPU locality, concurrency limits, priority and more.  To
+get a detailed overview refer to the API description of
 alloc_workqueue() below.
 
 When a work item is queued to a workqueue, the target gcwq and
@@ -166,16 +166,6 @@ resources, scheduled and executed.
 
 @flags:
 
-  WQ_NON_REENTRANT
-
-	By default, a wq guarantees non-reentrance only on the same
-	CPU.  A work item may not be executed concurrently on the same
-	CPU by multiple workers but is allowed to be executed
-	concurrently on multiple CPUs.  This flag makes sure
-	non-reentrance is enforced across all CPUs.  Work items queued
-	to a non-reentrant wq are guaranteed to be executed by at most
-	one worker system-wide at any given time.
-
   WQ_UNBOUND
 
 	Work items queued to an unbound wq are served by a special
@@ -233,6 +223,10 @@ resources, scheduled and executed.
 
 	This flag is meaningless for unbound wq.
 
+Note that the flag WQ_NON_REENTRANT no longer exists as all workqueues
+are now non-reentrant - any work item is guaranteed to be executed by
+at most one worker system-wide at any given time.
+
 @max_active:
 
 @max_active determines the maximum number of execution contexts per
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a0ed78ab54d7..594521ba0d43 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -295,7 +295,12 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * Documentation/workqueue.txt.
  */
 enum {
-	WQ_NON_REENTRANT	= 1 << 0, /* guarantee non-reentrance */
+	/*
+	 * All wqs are now non-reentrant making the following flag
+	 * meaningless.  Will be removed.
+	 */
+	WQ_NON_REENTRANT	= 1 << 0, /* DEPRECATED */
+
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
 	WQ_FREEZABLE		= 1 << 2, /* freeze during suspend */
 	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
-- 
cgit v1.2.3


From fada94ee64e6e18793b1db60fb8278d2eddbf922 Mon Sep 17 00:00:00 2001
From: Li Zhong <zhong@linux.vnet.ibm.com>
Date: Fri, 14 Feb 2014 10:52:57 +0800
Subject: workqueue: add args to workqueue lockdep name

Tommi noticed a 'funny' lock class name: "%s#5" from a lock acquired in
process_one_work().

Maybe #fmt plus #args could be used as the lock_name to give some more
information for some fmt string like the above.

__builtin_constant_p() check is removed (as there seems no good way to
check all the variables in args list). However, by removing the check,
it only adds two additional "s for those constants.

Some lockdep name examples printed out after the change:

lockdep name                    wq->name

"events_long"                   events_long
"%s"("khelper")                 khelper
"xfs-data/%s"mp->m_fsname       xfs-data/dm-3

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 594521ba0d43..704f4f652d0a 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -419,10 +419,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	static struct lock_class_key __key;				\
 	const char *__lock_name;					\
 									\
-	if (__builtin_constant_p(fmt))					\
-		__lock_name = (fmt);					\
-	else								\
-		__lock_name = #fmt;					\
+	__lock_name = #fmt#args;					\
 									\
 	__alloc_workqueue_key((fmt), (flags), (max_active),		\
 			      &__key, __lock_name, ##args);		\
-- 
cgit v1.2.3


From 90d88bd75424dff51e2072fd2f8fa85ee893aa17 Mon Sep 17 00:00:00 2001
From: Tan Xiaojun <tanxiaojun@huawei.com>
Date: Sat, 15 Feb 2014 13:19:51 +0800
Subject: workqueue: Remove deprecated __cancel_delayed_work()

__cancel_delayed_work() was deprecated by 136b5721d75a ("workqueue:
deprecate __cancel_delayed_work()") as cancel_delayed_work() was
updated so that it could be used from all contexts.  Enough time has
passed since the deprecation.  Let's remove it.

tj: description update

Signed-off-by: Tan Xiaojun <tanxiaojun@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 594521ba0d43..edc941049d79 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -605,21 +605,6 @@ static inline bool keventd_up(void)
 	return system_wq != NULL;
 }
 
-/*
- * Like above, but uses del_timer() instead of del_timer_sync(). This means,
- * if it returns 0 the timer function may be running and the queueing is in
- * progress.
- */
-static inline bool __deprecated __cancel_delayed_work(struct delayed_work *work)
-{
-	bool ret;
-
-	ret = del_timer(&work->timer);
-	if (ret)
-		work_clear_pending(&work->work);
-	return ret;
-}
-
 /* used to be different but now identical to flush_work(), deprecated */
 static inline bool __deprecated flush_work_sync(struct work_struct *work)
 {
-- 
cgit v1.2.3


From f073f9229ff1137d3be20558bec3bfb77e3af2a4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 7 Mar 2014 10:24:50 -0500
Subject: workqueue: remove PREPARE_[DELAYED_]WORK()

Peter Hurley noticed that since a2c1c57be8d9 ("workqueue: consider
work function when searching for busy work items"), a work item which
gets assigned a different work function would break out of the
non-reentrancy guarantee as workqueue would consider it a different
work item.

This is fragile and extremely subtle.  PREPARE_[DELAYED_]WORK() have
never been used widely and its semantics has always been somewhat
iffy.  If the work item is known not to be on queue when
PREPARE_WORK() is called, there's no difference from using
INIT_WORK().  If the work item may be queued at the time of
PREPARE_WORK(), we can't really tell whether the old or new function
will be executed the next time.

We really don't want this level of subtlety in workqueue interface for
such marginal use cases.  The previous patches converted all existing
users away from PREPARE_[DELAYED_]WORK().  Let's remove them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Peter Hurley <peter@hurleysoftware.com>
Link: http://lkml.kernel.org/g/1392493119-9277-1-git-send-email-peter@hurleysoftware.com
---
 include/linux/workqueue.h | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 8059334a6b02..29da9e77c3bb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -177,17 +177,6 @@ struct execute_work {
 #define DECLARE_DEFERRABLE_WORK(n, f)					\
 	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, TIMER_DEFERRABLE)
 
-/*
- * initialize a work item's function pointer
- */
-#define PREPARE_WORK(_work, _func)					\
-	do {								\
-		(_work)->func = (_func);				\
-	} while (0)
-
-#define PREPARE_DELAYED_WORK(_work, _func)				\
-	PREPARE_WORK(&(_work)->work, (_func))
-
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 extern void __init_work(struct work_struct *work, int onstack);
 extern void destroy_work_on_stack(struct work_struct *work);
@@ -217,7 +206,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \
 		INIT_LIST_HEAD(&(_work)->entry);			\
-		PREPARE_WORK((_work), (_func));				\
+		(_work)->func = (_func);				\
 	} while (0)
 #else
 #define __INIT_WORK(_work, _func, _onstack)				\
@@ -225,7 +214,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 		__init_work((_work), _onstack);				\
 		(_work)->data = (atomic_long_t) WORK_DATA_INIT();	\
 		INIT_LIST_HEAD(&(_work)->entry);			\
-		PREPARE_WORK((_work), (_func));				\
+		(_work)->func = (_func);				\
 	} while (0)
 #endif
 
-- 
cgit v1.2.3


From 41f50094b2ab4e37673e41a084ea61b907447159 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 24 Mar 2014 21:37:02 +0100
Subject: workqueue: Spelling s/instensive/intensive/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 29da9e77c3bb..0523eab05f63 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -294,7 +294,7 @@ enum {
 	WQ_FREEZABLE		= 1 << 2, /* freeze during suspend */
 	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
 	WQ_HIGHPRI		= 1 << 4, /* high priority */
-	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu instensive workqueue */
+	WQ_CPU_INTENSIVE	= 1 << 5, /* cpu intensive workqueue */
 	WQ_SYSFS		= 1 << 6, /* visible in sysfs, see wq_sysfs_register() */
 
 	/*
-- 
cgit v1.2.3


From ea2e64f280d2a34a8ed9ae3d783cd770d14b70ec Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 23 Mar 2014 14:20:44 +0000
Subject: workqueue: Provide destroy_delayed_work_on_stack()

If a delayed or deferrable work is on stack we need to tell debug
objects that we are destroying the timer and the work. Otherwise we
leak the tracking object.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Acked-by: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20140323141939.911487677@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/workqueue.h | 2 ++
 kernel/workqueue.c        | 7 +++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 594521ba0d43..abdbe5af119d 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -191,6 +191,7 @@ struct execute_work {
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 extern void __init_work(struct work_struct *work, int onstack);
 extern void destroy_work_on_stack(struct work_struct *work);
+extern void destroy_delayed_work_on_stack(struct delayed_work *work);
 static inline unsigned int work_static(struct work_struct *work)
 {
 	return *work_data_bits(work) & WORK_STRUCT_STATIC;
@@ -198,6 +199,7 @@ static inline unsigned int work_static(struct work_struct *work)
 #else
 static inline void __init_work(struct work_struct *work, int onstack) { }
 static inline void destroy_work_on_stack(struct work_struct *work) { }
+static inline void destroy_delayed_work_on_stack(struct delayed_work *work) { }
 static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #endif
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 82ef9f3b7473..5b690b5a9e74 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -516,6 +516,13 @@ void destroy_work_on_stack(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(destroy_work_on_stack);
 
+void destroy_delayed_work_on_stack(struct delayed_work *work)
+{
+	destroy_timer_on_stack(&work->timer);
+	debug_object_free(&work->work, &work_debug_descr);
+}
+EXPORT_SYMBOL_GPL(destroy_delayed_work_on_stack);
+
 #else
 static inline void debug_work_activate(struct work_struct *work) { }
 static inline void debug_work_deactivate(struct work_struct *work) { }
-- 
cgit v1.2.3


From 59ff3eb6d6f75c6c1c3ea8b46ac2cc64eb216547 Mon Sep 17 00:00:00 2001
From: ZhangZhen <zhenzhang.zhang@huawei.com>
Date: Thu, 27 Mar 2014 09:41:47 +0800
Subject: workqueue: remove deprecated WQ_NON_REENTRANT

Tejun Heo has made WQ_NON_REENTRANT useless in the dbf2576e37
("workqueue: make all workqueues non-reentrant"). So remove its
usages and definition.

This patch doesn't introduce any behavior changes.

tj: minor description updates.

Signed-off-by: ZhangZhen <zhenzhang.zhang@huawei.com>
Sigend-off-by: Tejun Heo <tj@kernel.org>
Acked-by: James Chapman <jchapman@katalix.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c | 2 +-
 include/linux/workqueue.h | 6 ------
 net/l2tp/l2tp_core.c      | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 55cd110a49c4..c204b7d1532c 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -2607,7 +2607,7 @@ int dw_mci_probe(struct dw_mci *host)
 
 	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
 	host->card_workqueue = alloc_workqueue("dw-mci-card",
-			WQ_MEM_RECLAIM | WQ_NON_REENTRANT, 1);
+			WQ_MEM_RECLAIM, 1);
 	if (!host->card_workqueue) {
 		ret = -ENOMEM;
 		goto err_dmaunmap;
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0523eab05f63..532994651684 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -284,12 +284,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
  * Documentation/workqueue.txt.
  */
 enum {
-	/*
-	 * All wqs are now non-reentrant making the following flag
-	 * meaningless.  Will be removed.
-	 */
-	WQ_NON_REENTRANT	= 1 << 0, /* DEPRECATED */
-
 	WQ_UNBOUND		= 1 << 1, /* not bound to any cpu */
 	WQ_FREEZABLE		= 1 << 2, /* freeze during suspend */
 	WQ_MEM_RECLAIM		= 1 << 3, /* may be used for memory reclaim */
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 735d0f60c83a..fcbd63ea4909 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -2016,7 +2016,7 @@ static int __init l2tp_init(void)
 	if (rc)
 		goto out;
 
-	l2tp_wq = alloc_workqueue("l2tp", WQ_NON_REENTRANT | WQ_UNBOUND, 0);
+	l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0);
 	if (!l2tp_wq) {
 		pr_err("alloc_workqueue failed\n");
 		rc = -ENOMEM;
-- 
cgit v1.2.3


From 1a56f2aa4752293e5a9c0c3a2331620aa1fdb808 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 14 May 2014 13:43:37 +0900
Subject: workqueue: Remove deprecated flush[_delayed]_work_sync()

flush[_delayed]_work_sync() were deprecated by 4382973 ("workqueue:
deprecate flush[_delayed]_work_sync()") and have been deprecated
for a long time. In addition, these are not used anymore. So,
let's remove these functions.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 1b22c42e9c2d..aa92d0295e28 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -587,18 +587,6 @@ static inline bool keventd_up(void)
 	return system_wq != NULL;
 }
 
-/* used to be different but now identical to flush_work(), deprecated */
-static inline bool __deprecated flush_work_sync(struct work_struct *work)
-{
-	return flush_work(work);
-}
-
-/* used to be different but now identical to flush_delayed_work(), deprecated */
-static inline bool __deprecated flush_delayed_work_sync(struct delayed_work *dwork)
-{
-	return flush_delayed_work(dwork);
-}
-
 #ifndef CONFIG_SMP
 static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
 {
-- 
cgit v1.2.3


From cf416171e7e1d966111f53bdae82f51af05e7bf8 Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Wed, 14 May 2014 13:58:06 +0900
Subject: workqueue: Remove deprecated system_nrt[_freezable]_wq

system_nrt[_freezable]_wq were deprecated by 3b07e9c ("workqueue:
deprecate system_nrt[_freezable]_wq") and have been deprecated
for a long time. In addition, these are not used anymore. So,
let's remove these functions.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index aa92d0295e28..d93d28b2ec73 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -364,20 +364,6 @@ extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
 
-static inline struct workqueue_struct * __deprecated __system_nrt_wq(void)
-{
-	return system_wq;
-}
-
-static inline struct workqueue_struct * __deprecated __system_nrt_freezable_wq(void)
-{
-	return system_freezable_wq;
-}
-
-/* equivlalent to system_wq and system_freezable_wq, deprecated */
-#define system_nrt_wq			__system_nrt_wq()
-#define system_nrt_freezable_wq		__system_nrt_freezable_wq()
-
 extern struct workqueue_struct *
 __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	struct lock_class_key *key, const char *lock_name, ...) __printf(1, 6);
-- 
cgit v1.2.3


From 73e4354444eef5251e5cdfd388ab02ef9f2e727e Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:42:41 +0800
Subject: workqueue: declare system_highpri_wq

system_highpri_wq is exported to modules via EXPORT_SYMBOL_GPL(),
but it was forgotten to be declared in workqueue.h. So we add the declaration
and a short description for it.

tj: Minor comment tweak.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index d93d28b2ec73..b263b29bd98b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -340,6 +340,9 @@ enum {
  * short queue flush time.  Don't queue works which can run for too
  * long.
  *
+ * system_highpri_wq is similar to system_wq but for work items which
+ * require WQ_HIGHPRI.
+ *
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
@@ -358,6 +361,7 @@ enum {
  * 'wq_power_efficient' is disabled.  See WQ_POWER_EFFICIENT for more info.
  */
 extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_highpri_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
 extern struct workqueue_struct *system_freezable_wq;
-- 
cgit v1.2.3


From 79bc251f0e0aea67bc230c530f7fa57f66f9cdf3 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:43:44 +0800
Subject: workqueue: remove unused WORK_CPU_END

WORK_CPU_END is totally unused since 4e8b22bd1a37 ("workqueue: fix
pool ID allocation leakage and remove BUILD_BUG_ON() in
init_workqueues"). It should be removed.

After it is removed, the comment "special cpu IDs" is not precise due to
there is only one special CPU ID (WORK_CPU_UNBOUND) left, so we also
change this comment to the description for WORK_CPU_UNBOUND.

tj: Minor description and comment tweaks.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b263b29bd98b..b8aee9453f22 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -56,9 +56,8 @@ enum {
 	WORK_NR_COLORS		= (1 << WORK_STRUCT_COLOR_BITS) - 1,
 	WORK_NO_COLOR		= WORK_NR_COLORS,
 
-	/* special cpu IDs */
+	/* not bound to any CPU, prefer the local CPU */
 	WORK_CPU_UNBOUND	= NR_CPUS,
-	WORK_CPU_END		= NR_CPUS + 1,
 
 	/*
 	 * Reserve 7 bits off of pwq pointer w/ debugobjects turned off.
-- 
cgit v1.2.3


From cafebac153ae54fd0aba5d4ad28af995532c5375 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 22 May 2014 16:43:56 +0800
Subject: workqueue: remove unused work_clear_pending()

In 8930caba3dbd ("workqueue: disable irq while manipulating PENDING"),
setting last CPU and clearing PENDING got merged into a single
operation (set_work_cpu_and_clear_pending()), which resulted that the
internal routine work_clear_pending() is not used any more.

tj: Minor description tweak.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b8aee9453f22..a0cc2e95ed1b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -273,13 +273,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #define delayed_work_pending(w) \
 	work_pending(&(w)->work)
 
-/**
- * work_clear_pending - for internal use only, mark a work item as not pending
- * @work: The work item in question
- */
-#define work_clear_pending(work) \
-	clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))
-
 /*
  * Workqueue flags and constants.  For details, please refer to
  * Documentation/workqueue.txt.
-- 
cgit v1.2.3


From e09c2c295468476a239d13324ce9042ec4de05eb Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 13 Sep 2014 04:14:30 +0900
Subject: workqueue: apply __WQ_ORDERED to create_singlethread_workqueue()

create_singlethread_workqueue() is a compat interface for single
threaded workqueue which maps to ordered workqueue w/ rescuer in the
current implementation.  create_singlethread_workqueue() currently
implemented by invoking alloc_workqueue() w/ appropriate parameters.

8719dceae2f9 ("workqueue: reject adjusting max_active or applying
attrs to ordered workqueues") introduced __WQ_ORDERED to protect
ordered workqueues against dynamic attribute changes which can break
ordering guarantees but forgot to apply it to
create_singlethread_workqueue().  This in itself is okay as nobody
currently uses dynamic attribute change on workqueues created with
create_singlethread_workqueue().

However, 4c16bd327c ("workqueue: implement NUMA affinity for unbound
workqueues") broke singlethreaded guarantee for ordered workqueues
through allocating a separate pool_workqueue on each NUMA node by
default.  A later change 8a2b75384444 ("workqueue: fix ordered
workqueues in NUMA setups") fixed it by allocating only one global
pool_workqueue if __WQ_ORDERED is set.

Combined, the __WQ_ORDERED omission in create_singlethread_workqueue()
became critical breaking its single threadedness and ordering
guarantee.

Let's make create_singlethread_workqueue() wrap
alloc_ordered_workqueue() instead so that it inherits __WQ_ORDERED and
can implicitly track future ordered_workqueue changes.

v2: I missed that __WQ_ORDERED now protects against pwq splitting
    across NUMA nodes and incorrectly described the patch as a
    nice-to-have fix to protect against future dynamic attribute
    usages.  Oleg pointed out that this is actually a critical
    breakage due to 8a2b75384444 ("workqueue: fix ordered workqueues
    in NUMA setups").

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Mike Anderson <mike.anderson@us.ibm.com>
Cc: Oleg Nesterov <onestero@redhat.com>
Cc: Gustavo Luiz Duarte <gduarte@redhat.com>
Cc: Tomas Henzl <thenzl@redhat.com>
Cc: stable@vger.kernel.org
Fixes: 4c16bd327c ("workqueue: implement NUMA affinity for unbound workqueues")
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index a0cc2e95ed1b..b996e6cde6bb 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -419,7 +419,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
 	alloc_workqueue("%s", WQ_FREEZABLE | WQ_UNBOUND | WQ_MEM_RECLAIM, \
 			1, (name))
 #define create_singlethread_workqueue(name)				\
-	alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, (name))
+	alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, name)
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
-- 
cgit v1.2.3


From 9da7dae94fb8adab5cc5f395640e30736a66e910 Mon Sep 17 00:00:00 2001
From: Valentin Rothberg <valentinrothberg@gmail.com>
Date: Tue, 6 Jan 2015 17:29:29 +0100
Subject: workqueue.h: remove loops of single statement macros

checkpatch.pl complained about two single statement macros in
do while (0) loops.  The loops and the trailing semicolons are
now removed, which makes checkpatch happy and the two macros
consistent with the rest of the file.

Signed-off-by: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b996e6cde6bb..74db135f9957 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -220,14 +220,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 #endif
 
 #define INIT_WORK(_work, _func)						\
-	do {								\
-		__INIT_WORK((_work), (_func), 0);			\
-	} while (0)
+	__INIT_WORK((_work), (_func), 0)
 
 #define INIT_WORK_ONSTACK(_work, _func)					\
-	do {								\
-		__INIT_WORK((_work), (_func), 1);			\
-	} while (0)
+	__INIT_WORK((_work), (_func), 1)
 
 #define __INIT_DELAYED_WORK(_work, _func, _tflags)			\
 	do {								\
-- 
cgit v1.2.3


From 8603e1b30027f943cc9c1eef2b291d42c3347af1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 5 Mar 2015 08:04:13 -0500
Subject: workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s
 for PREEMPT_NONE

cancel[_delayed]_work_sync() are implemented using
__cancel_work_timer() which grabs the PENDING bit using
try_to_grab_pending() and then flushes the work item with PENDING set
to prevent the on-going execution of the work item from requeueing
itself.

try_to_grab_pending() can always grab PENDING bit without blocking
except when someone else is doing the above flushing during
cancelation.  In that case, try_to_grab_pending() returns -ENOENT.  In
this case, __cancel_work_timer() currently invokes flush_work().  The
assumption is that the completion of the work item is what the other
canceling task would be waiting for too and thus waiting for the same
condition and retrying should allow forward progress without excessive
busy looping

Unfortunately, this doesn't work if preemption is disabled or the
latter task has real time priority.  Let's say task A just got woken
up from flush_work() by the completion of the target work item.  If,
before task A starts executing, task B gets scheduled and invokes
__cancel_work_timer() on the same work item, its try_to_grab_pending()
will return -ENOENT as the work item is still being canceled by task A
and flush_work() will also immediately return false as the work item
is no longer executing.  This puts task B in a busy loop possibly
preventing task A from executing and clearing the canceling state on
the work item leading to a hang.

task A			task B			worker

						executing work
__cancel_work_timer()
  try_to_grab_pending()
  set work CANCELING
  flush_work()
    block for work completion
						completion, wakes up A
			__cancel_work_timer()
			while (forever) {
			  try_to_grab_pending()
			    -ENOENT as work is being canceled
			  flush_work()
			    false as work is no longer executing
			}

This patch removes the possible hang by updating __cancel_work_timer()
to explicitly wait for clearing of CANCELING rather than invoking
flush_work() after try_to_grab_pending() fails with -ENOENT.

Link: http://lkml.kernel.org/g/20150206171156.GA8942@axis.com

v3: bit_waitqueue() can't be used for work items defined in vmalloc
    area.  Switched to custom wake function which matches the target
    work item and exclusive wait and wakeup.

v2: v1 used wake_up() on bit_waitqueue() which leads to NULL deref if
    the target bit waitqueue has wait_bit_queue's on it.  Use
    DEFINE_WAIT_BIT() and __wake_up_bit() instead.  Reported by Tomeu
    Vizoso.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Rabin Vincent <rabin.vincent@axis.com>
Cc: Tomeu Vizoso <tomeu.vizoso@gmail.com>
Cc: stable@vger.kernel.org
Tested-by: Jesper Nilsson <jesper.nilsson@axis.com>
Tested-by: Rabin Vincent <rabin.vincent@axis.com>
---
 include/linux/workqueue.h |  3 ++-
 kernel/workqueue.c        | 56 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 5 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 74db135f9957..f597846ff605 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,8 @@ enum {
 	/* data contains off-queue information when !WORK_STRUCT_PWQ */
 	WORK_OFFQ_FLAG_BASE	= WORK_STRUCT_COLOR_SHIFT,
 
-	WORK_OFFQ_CANCELING	= (1 << WORK_OFFQ_FLAG_BASE),
+	__WORK_OFFQ_CANCELING	= WORK_OFFQ_FLAG_BASE,
+	WORK_OFFQ_CANCELING	= (1 << __WORK_OFFQ_CANCELING),
 
 	/*
 	 * When a work item is off queue, its high bits point to the last
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f28849394791..41ff75b478c6 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
+struct cwt_wait {
+	wait_queue_t		wait;
+	struct work_struct	*work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+	if (cwait->work != key)
+		return 0;
+	return autoremove_wake_function(wait, mode, sync, key);
+}
+
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+	static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
 	unsigned long flags;
 	int ret;
 
 	do {
 		ret = try_to_grab_pending(work, is_dwork, &flags);
 		/*
-		 * If someone else is canceling, wait for the same event it
-		 * would be waiting for before retrying.
+		 * If someone else is already canceling, wait for it to
+		 * finish.  flush_work() doesn't work for PREEMPT_NONE
+		 * because we may get scheduled between @work's completion
+		 * and the other canceling task resuming and clearing
+		 * CANCELING - flush_work() will return false immediately
+		 * as @work is no longer busy, try_to_grab_pending() will
+		 * return -ENOENT as @work is still being canceled and the
+		 * other canceling task won't be able to clear CANCELING as
+		 * we're hogging the CPU.
+		 *
+		 * Let's wait for completion using a waitqueue.  As this
+		 * may lead to the thundering herd problem, use a custom
+		 * wake function which matches @work along with exclusive
+		 * wait and wakeup.
 		 */
-		if (unlikely(ret == -ENOENT))
-			flush_work(work);
+		if (unlikely(ret == -ENOENT)) {
+			struct cwt_wait cwait;
+
+			init_wait(&cwait.wait);
+			cwait.wait.func = cwt_wakefn;
+			cwait.work = work;
+
+			prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+						  TASK_UNINTERRUPTIBLE);
+			if (work_is_canceling(work))
+				schedule();
+			finish_wait(&cancel_waitq, &cwait.wait);
+		}
 	} while (unlikely(ret < 0));
 
 	/* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 
 	flush_work(work);
 	clear_work_data(work);
+
+	/*
+	 * Paired with prepare_to_wait() above so that either
+	 * waitqueue_active() is visible here or !work_is_canceling() is
+	 * visible there.
+	 */
+	smp_mb();
+	if (waitqueue_active(&cancel_waitq))
+		__wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 3494fc30846dceb808de4cc02930ef347fabd21a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 9 Mar 2015 09:22:28 -0400
Subject: workqueue: dump workqueues on sysrq-t

Workqueues are used extensively throughout the kernel but sometimes
it's difficult to debug stalls involving work items because visibility
into its inner workings is fairly limited.  Although sysrq-t task dump
annotates each active worker task with the information on the work
item being executed, it is challenging to find out which work items
are pending or delayed on which queues and how pools are being
managed.

This patch implements show_workqueue_state() which dumps all busy
workqueues and pools and is called from the sysrq-t handler.  At the
end of sysrq-t dump, something like the following is printed.

 Showing busy workqueues and worker pools:
 ...
 workqueue filler_wq: flags=0x0
   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=2/256
     in-flight: 491:filler_workfn, 507:filler_workfn
   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256
     in-flight: 501:filler_workfn
     pending: filler_workfn
 ...
 workqueue test_wq: flags=0x8
   pwq 2: cpus=1 node=0 flags=0x0 nice=0 active=1/1
     in-flight: 510(RESCUER):test_workfn BAR(69) BAR(500)
     delayed: test_workfn1 BAR(492), test_workfn2
 ...
 pool 0: cpus=0 node=0 flags=0x0 nice=0 workers=2 manager: 137
 pool 2: cpus=1 node=0 flags=0x0 nice=0 workers=3 manager: 469
 pool 3: cpus=1 node=0 flags=0x0 nice=-20 workers=2 idle: 16
 pool 8: cpus=0-3 flags=0x4 nice=0 workers=2 manager: 62

The above shows that test_wq is executing test_workfn() on pid 510
which is the rescuer and also that there are two tasks 69 and 500
waiting for the work item to finish in flush_work().  As test_wq has
max_active of 1, there are two work items for test_workfn1() and
test_workfn2() which are delayed till the current work item is
finished.  In addition, pid 492 is flushing test_workfn1().

The work item for test_workfn() is being executed on pwq of pool 2
which is the normal priority per-cpu pool for CPU 1.  The pool has
three workers, two of which are executing filler_workfn() for
filler_wq and the last one is assuming the manager role trying to
create more workers.

This extra workqueue state dump will hopefully help chasing down hangs
involving workqueues.

v3: cpulist_pr_cont() replaced with "%*pbl" printf formatting.

v2: As suggested by Andrew, minor formatting change in pr_cont_work(),
    printk()'s replaced with pr_info()'s, and cpumask printing now
    uses cpulist_pr_cont().

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
CC: Ingo Molnar <mingo@redhat.com>
---
 drivers/tty/sysrq.c       |   1 +
 include/linux/workqueue.h |   1 +
 kernel/workqueue.c        | 160 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 162 insertions(+)

(limited to 'include/linux/workqueue.h')

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 259a4d5a4e8f..843f2cdc280b 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -275,6 +275,7 @@ static struct sysrq_key_op sysrq_showregs_op = {
 static void sysrq_handle_showstate(int key)
 {
 	show_state();
+	show_workqueue_state();
 }
 static struct sysrq_key_op sysrq_showstate_op = {
 	.handler	= sysrq_handle_showstate,
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f597846ff605..deee212af8e0 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -454,6 +454,7 @@ extern bool workqueue_congested(int cpu, struct workqueue_struct *wq);
 extern unsigned int work_busy(struct work_struct *work);
 extern __printf(1, 2) void set_worker_desc(const char *fmt, ...);
 extern void print_worker_info(const char *log_lvl, struct task_struct *task);
+extern void show_workqueue_state(void);
 
 /**
  * queue_work - queue work on a workqueue
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0c329a6f0c51..1ca0b1d54e70 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4457,6 +4457,166 @@ void print_worker_info(const char *log_lvl, struct task_struct *task)
 	}
 }
 
+static void pr_cont_pool_info(struct worker_pool *pool)
+{
+	pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
+	if (pool->node != NUMA_NO_NODE)
+		pr_cont(" node=%d", pool->node);
+	pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
+}
+
+static void pr_cont_work(bool comma, struct work_struct *work)
+{
+	if (work->func == wq_barrier_func) {
+		struct wq_barrier *barr;
+
+		barr = container_of(work, struct wq_barrier, work);
+
+		pr_cont("%s BAR(%d)", comma ? "," : "",
+			task_pid_nr(barr->task));
+	} else {
+		pr_cont("%s %pf", comma ? "," : "", work->func);
+	}
+}
+
+static void show_pwq(struct pool_workqueue *pwq)
+{
+	struct worker_pool *pool = pwq->pool;
+	struct work_struct *work;
+	struct worker *worker;
+	bool has_in_flight = false, has_pending = false;
+	int bkt;
+
+	pr_info("  pwq %d:", pool->id);
+	pr_cont_pool_info(pool);
+
+	pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active,
+		!list_empty(&pwq->mayday_node) ? " MAYDAY" : "");
+
+	hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+		if (worker->current_pwq == pwq) {
+			has_in_flight = true;
+			break;
+		}
+	}
+	if (has_in_flight) {
+		bool comma = false;
+
+		pr_info("    in-flight:");
+		hash_for_each(pool->busy_hash, bkt, worker, hentry) {
+			if (worker->current_pwq != pwq)
+				continue;
+
+			pr_cont("%s %d%s:%pf", comma ? "," : "",
+				task_pid_nr(worker->task),
+				worker == pwq->wq->rescuer ? "(RESCUER)" : "",
+				worker->current_func);
+			list_for_each_entry(work, &worker->scheduled, entry)
+				pr_cont_work(false, work);
+			comma = true;
+		}
+		pr_cont("\n");
+	}
+
+	list_for_each_entry(work, &pool->worklist, entry) {
+		if (get_work_pwq(work) == pwq) {
+			has_pending = true;
+			break;
+		}
+	}
+	if (has_pending) {
+		bool comma = false;
+
+		pr_info("    pending:");
+		list_for_each_entry(work, &pool->worklist, entry) {
+			if (get_work_pwq(work) != pwq)
+				continue;
+
+			pr_cont_work(comma, work);
+			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+		}
+		pr_cont("\n");
+	}
+
+	if (!list_empty(&pwq->delayed_works)) {
+		bool comma = false;
+
+		pr_info("    delayed:");
+		list_for_each_entry(work, &pwq->delayed_works, entry) {
+			pr_cont_work(comma, work);
+			comma = !(*work_data_bits(work) & WORK_STRUCT_LINKED);
+		}
+		pr_cont("\n");
+	}
+}
+
+/**
+ * show_workqueue_state - dump workqueue state
+ *
+ * Called from a sysrq handler and prints out all busy workqueues and
+ * pools.
+ */
+void show_workqueue_state(void)
+{
+	struct workqueue_struct *wq;
+	struct worker_pool *pool;
+	unsigned long flags;
+	int pi;
+
+	rcu_read_lock_sched();
+
+	pr_info("Showing busy workqueues and worker pools:\n");
+
+	list_for_each_entry_rcu(wq, &workqueues, list) {
+		struct pool_workqueue *pwq;
+		bool idle = true;
+
+		for_each_pwq(pwq, wq) {
+			if (pwq->nr_active || !list_empty(&pwq->delayed_works)) {
+				idle = false;
+				break;
+			}
+		}
+		if (idle)
+			continue;
+
+		pr_info("workqueue %s: flags=0x%x\n", wq->name, wq->flags);
+
+		for_each_pwq(pwq, wq) {
+			spin_lock_irqsave(&pwq->pool->lock, flags);
+			if (pwq->nr_active || !list_empty(&pwq->delayed_works))
+				show_pwq(pwq);
+			spin_unlock_irqrestore(&pwq->pool->lock, flags);
+		}
+	}
+
+	for_each_pool(pool, pi) {
+		struct worker *worker;
+		bool first = true;
+
+		spin_lock_irqsave(&pool->lock, flags);
+		if (pool->nr_workers == pool->nr_idle)
+			goto next_pool;
+
+		pr_info("pool %d:", pool->id);
+		pr_cont_pool_info(pool);
+		pr_cont(" workers=%d", pool->nr_workers);
+		if (pool->manager)
+			pr_cont(" manager: %d",
+				task_pid_nr(pool->manager->task));
+		list_for_each_entry(worker, &pool->idle_list, entry) {
+			pr_cont(" %s%d", first ? "idle: " : "",
+				task_pid_nr(worker->task));
+			first = false;
+		}
+		pr_cont("\n");
+	next_pool:
+		spin_unlock_irqrestore(&pool->lock, flags);
+	}
+
+	rcu_read_unlock_sched();
+}
+
 /*
  * CPU hotplug.
  *
-- 
cgit v1.2.3


From 042f7df15a4fff8eec42873f755aea848dcdedd1 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 30 Apr 2015 17:16:12 +0800
Subject: workqueue: Allow modifying low level unbound workqueue cpumask

Allow to modify the low-level unbound workqueues cpumask through
sysfs. This is performed by traversing the entire workqueue list
and calling apply_wqattrs_prepare() on the unbound workqueues
with the new low level mask. Only after all the preparation are done,
we commit them all together.

Ordered workqueues are ignored from the low level unbound workqueue
cpumask, it will be handled in near future.

All the (default & per-node) pwqs are mandatorily controlled by
the low level cpumask. If the user configured cpumask doesn't overlap
with the low level cpumask, the low level cpumask will be used for the
wq instead.

The comment of wq_calc_node_cpumask() is updated and explicitly
requires that its first argument should be the attrs of the default
pwq.

The default wq_unbound_cpumask is cpu_possible_mask.  The workqueue
subsystem doesn't know its best default value, let the system manager
or the other subsystem set it when needed.

Changed from V8:
  merge the calculating code for the attrs of the default pwq together.
  minor change the code&comments for saving the user configured attrs.
  remove unnecessary list_del().
  minor update the comment of wq_calc_node_cpumask().
  update the comment of workqueue_set_unbound_cpumask();

Cc: Christoph Lameter <cl@linux.com>
Cc: Kevin Hilman <khilman@linaro.org>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Mike Galbraith <bitbucket@online.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h |   1 +
 kernel/workqueue.c        | 127 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 119 insertions(+), 9 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index deee212af8e0..4618dd672d1b 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -424,6 +424,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask);
 void free_workqueue_attrs(struct workqueue_attrs *attrs);
 int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs);
+int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 9be75e2a4da6..a3915abc1983 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -299,7 +299,7 @@ static DEFINE_SPINLOCK(wq_mayday_lock);	/* protects wq->maydays list */
 static LIST_HEAD(workqueues);		/* PR: list of all workqueues */
 static bool workqueue_freezing;		/* PL: have wqs started freezing? */
 
-static cpumask_var_t wq_unbound_cpumask;
+static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
 
 /* the per-cpu worker pools */
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
@@ -3429,7 +3429,7 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
 
 /**
  * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node
- * @attrs: the wq_attrs of interest
+ * @attrs: the wq_attrs of the default pwq of the target workqueue
  * @node: the target NUMA node
  * @cpu_going_down: if >= 0, the CPU to consider as offline
  * @cpumask: outarg, the resulting cpumask
@@ -3493,6 +3493,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
 struct apply_wqattrs_ctx {
 	struct workqueue_struct	*wq;		/* target workqueue */
 	struct workqueue_attrs	*attrs;		/* attrs to apply */
+	struct list_head	list;		/* queued for batching commit */
 	struct pool_workqueue	*dfl_pwq;
 	struct pool_workqueue	*pwq_tbl[];
 };
@@ -3532,9 +3533,15 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 	if (!ctx || !new_attrs || !tmp_attrs)
 		goto out_free;
 
-	/* make a copy of @attrs and sanitize it */
+	/*
+	 * Calculate the attrs of the default pwq.
+	 * If the user configured cpumask doesn't overlap with the
+	 * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
+	 */
 	copy_workqueue_attrs(new_attrs, attrs);
 	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
+	if (unlikely(cpumask_empty(new_attrs->cpumask)))
+		cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
 
 	/*
 	 * We may create multiple pwqs with differing cpumasks.  Make a
@@ -3553,7 +3560,7 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 		goto out_free;
 
 	for_each_node(node) {
-		if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) {
+		if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
 			ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
 			if (!ctx->pwq_tbl[node])
 				goto out_free;
@@ -3563,7 +3570,11 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
 		}
 	}
 
+	/* save the user configured attrs and sanitize it. */
+	copy_workqueue_attrs(new_attrs, attrs);
+	cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
 	ctx->attrs = new_attrs;
+
 	ctx->wq = wq;
 	free_workqueue_attrs(tmp_attrs);
 	return ctx;
@@ -3704,11 +3715,11 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
 
 	/*
 	 * Let's determine what needs to be done.  If the target cpumask is
-	 * different from wq's, we need to compare it to @pwq's and create
-	 * a new one if they don't match.  If the target cpumask equals
-	 * wq's, the default pwq should be used.
+	 * different from the default pwq's, we need to compare it to @pwq's
+	 * and create a new one if they don't match.  If the target cpumask
+	 * equals the default pwq's, the default pwq should be used.
 	 */
-	if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
+	if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
 		if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
 			goto out_unlock;
 	} else {
@@ -4731,6 +4742,84 @@ out_unlock:
 }
 #endif /* CONFIG_FREEZER */
 
+static int workqueue_apply_unbound_cpumask(void)
+{
+	LIST_HEAD(ctxs);
+	int ret = 0;
+	struct workqueue_struct *wq;
+	struct apply_wqattrs_ctx *ctx, *n;
+
+	lockdep_assert_held(&wq_pool_mutex);
+
+	list_for_each_entry(wq, &workqueues, list) {
+		if (!(wq->flags & WQ_UNBOUND))
+			continue;
+		/* creating multiple pwqs breaks ordering guarantee */
+		if (wq->flags & __WQ_ORDERED)
+			continue;
+
+		ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
+		if (!ctx) {
+			ret = -ENOMEM;
+			break;
+		}
+
+		list_add_tail(&ctx->list, &ctxs);
+	}
+
+	list_for_each_entry_safe(ctx, n, &ctxs, list) {
+		if (!ret)
+			apply_wqattrs_commit(ctx);
+		apply_wqattrs_cleanup(ctx);
+	}
+
+	return ret;
+}
+
+/**
+ *  workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
+ *  @cpumask: the cpumask to set
+ *
+ *  The low-level workqueues cpumask is a global cpumask that limits
+ *  the affinity of all unbound workqueues.  This function check the @cpumask
+ *  and apply it to all unbound workqueues and updates all pwqs of them.
+ *
+ *  Retun:	0	- Success
+ *  		-EINVAL	- Invalid @cpumask
+ *  		-ENOMEM	- Failed to allocate memory for attrs or pwqs.
+ */
+int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
+{
+	int ret = -EINVAL;
+	cpumask_var_t saved_cpumask;
+
+	if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	get_online_cpus();
+	cpumask_and(cpumask, cpumask, cpu_possible_mask);
+	if (!cpumask_empty(cpumask)) {
+		mutex_lock(&wq_pool_mutex);
+
+		/* save the old wq_unbound_cpumask. */
+		cpumask_copy(saved_cpumask, wq_unbound_cpumask);
+
+		/* update wq_unbound_cpumask at first and apply it to wqs. */
+		cpumask_copy(wq_unbound_cpumask, cpumask);
+		ret = workqueue_apply_unbound_cpumask();
+
+		/* restore the wq_unbound_cpumask when failed. */
+		if (ret < 0)
+			cpumask_copy(wq_unbound_cpumask, saved_cpumask);
+
+		mutex_unlock(&wq_pool_mutex);
+	}
+	put_online_cpus();
+
+	free_cpumask_var(saved_cpumask);
+	return ret;
+}
+
 #ifdef CONFIG_SYSFS
 /*
  * Workqueues with WQ_SYSFS flag set is visible to userland via
@@ -4952,14 +5041,34 @@ static ssize_t wq_unbound_cpumask_show(struct device *dev,
 {
 	int written;
 
+	mutex_lock(&wq_pool_mutex);
 	written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
 			    cpumask_pr_args(wq_unbound_cpumask));
+	mutex_unlock(&wq_pool_mutex);
 
 	return written;
 }
 
+static ssize_t wq_unbound_cpumask_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	cpumask_var_t cpumask;
+	int ret;
+
+	if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
+		return -ENOMEM;
+
+	ret = cpumask_parse(buf, cpumask);
+	if (!ret)
+		ret = workqueue_set_unbound_cpumask(cpumask);
+
+	free_cpumask_var(cpumask);
+	return ret ? ret : count;
+}
+
 static struct device_attribute wq_sysfs_cpumask_attr =
-	__ATTR(cpumask, 0444, wq_unbound_cpumask_show, NULL);
+	__ATTR(cpumask, 0644, wq_unbound_cpumask_show,
+	       wq_unbound_cpumask_store);
 
 static int __init wq_sysfs_init(void)
 {
-- 
cgit v1.2.3


From 37b1ef31a568fc02e53587620226e5f3c66454c8 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Wed, 20 May 2015 14:41:19 +0800
Subject: workqueue: move flush_scheduled_work() to workqueue.h

flush_scheduled_work() is just a simple call to flush_work().

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 30 +++++++++++++++++++++++++++++-
 kernel/workqueue.c        | 30 ------------------------------
 2 files changed, 29 insertions(+), 31 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 4618dd672d1b..738b30b39b68 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,7 +435,6 @@ extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
 
 extern void flush_workqueue(struct workqueue_struct *wq);
 extern void drain_workqueue(struct workqueue_struct *wq);
-extern void flush_scheduled_work(void);
 
 extern int schedule_on_each_cpu(work_func_t func);
 
@@ -531,6 +530,35 @@ static inline bool schedule_work(struct work_struct *work)
 	return queue_work(system_wq, work);
 }
 
+/**
+ * flush_scheduled_work - ensure that any scheduled work has run to completion.
+ *
+ * Forces execution of the kernel-global workqueue and blocks until its
+ * completion.
+ *
+ * Think twice before calling this function!  It's very easy to get into
+ * trouble if you don't take great care.  Either of the following situations
+ * will lead to deadlock:
+ *
+ *	One of the work items currently on the workqueue needs to acquire
+ *	a lock held by your code or its caller.
+ *
+ *	Your code is running in the context of a work routine.
+ *
+ * They will be detected by lockdep when they occur, but the first might not
+ * occur very often.  It depends on what work items are on the workqueue and
+ * what locks they need, which you have no control over.
+ *
+ * In most situations flushing the entire workqueue is overkill; you merely
+ * need to know that a particular work item isn't queued and isn't running.
+ * In such cases you should use cancel_delayed_work_sync() or
+ * cancel_work_sync() instead.
+ */
+static inline void flush_scheduled_work(void)
+{
+	flush_workqueue(system_wq);
+}
+
 /**
  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
  * @cpu: cpu to use
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ad8dc2b9efc3..c9eaa4e5c867 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2958,36 +2958,6 @@ int schedule_on_each_cpu(work_func_t func)
 	return 0;
 }
 
-/**
- * flush_scheduled_work - ensure that any scheduled work has run to completion.
- *
- * Forces execution of the kernel-global workqueue and blocks until its
- * completion.
- *
- * Think twice before calling this function!  It's very easy to get into
- * trouble if you don't take great care.  Either of the following situations
- * will lead to deadlock:
- *
- *	One of the work items currently on the workqueue needs to acquire
- *	a lock held by your code or its caller.
- *
- *	Your code is running in the context of a work routine.
- *
- * They will be detected by lockdep when they occur, but the first might not
- * occur very often.  It depends on what work items are on the workqueue and
- * what locks they need, which you have no control over.
- *
- * In most situations flushing the entire workqueue is overkill; you merely
- * need to know that a particular work item isn't queued and isn't running.
- * In such cases you should use cancel_delayed_work_sync() or
- * cancel_work_sync() instead.
- */
-void flush_scheduled_work(void)
-{
-	flush_workqueue(system_wq);
-}
-EXPORT_SYMBOL(flush_scheduled_work);
-
 /**
  * execute_in_process_context - reliably execute the routine with user context
  * @fn:		the function to execute
-- 
cgit v1.2.3


From 355c06633e233a57155b827ebe99b91c35bc1f5c Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Thu, 13 Aug 2015 17:52:02 -0600
Subject: workqueue: fix some docbook warnings

There are some errors in the docbook comments in workqueue.h that cause
warnings when the docs are built; this only recently came to light because
these comments were not used until now.  Fix the comments to make the
warnings go away.

The "args..." "fix" is a hack.  kerneldoc doesn't deal properly with named
variadic arguments in macros, so all I've really achieved here is to make
it shut up.  Fixing kerneldoc will have to wait for more time.

Signed-off-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/workqueue.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 738b30b39b68..0197358f1e81 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -265,7 +265,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; }
 /**
  * delayed_work_pending - Find out whether a delayable work item is currently
  * pending
- * @work: The work item in question
+ * @w: The work item in question
  */
 #define delayed_work_pending(w) \
 	work_pending(&(w)->work)
@@ -366,7 +366,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags
  * @max_active: max in-flight work items, 0 for default
- * @args: args for @fmt
+ * @args...: args for @fmt
  *
  * Allocate a workqueue with the specified parameters.  For detailed
  * information on WQ_* flags, please refer to Documentation/workqueue.txt.
@@ -398,7 +398,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active,
  * alloc_ordered_workqueue - allocate an ordered workqueue
  * @fmt: printf format for the name of the workqueue
  * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
- * @args: args for @fmt
+ * @args...: args for @fmt
  *
  * Allocate an ordered workqueue.  An ordered workqueue executes at
  * most one work item at any given time in the queued order.  They are
-- 
cgit v1.2.3


From 82607adcf9cdf40fb7b5331269780c8f70ec6e35 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 8 Dec 2015 11:28:04 -0500
Subject: workqueue: implement lockup detector

Workqueue stalls can happen from a variety of usage bugs such as
missing WQ_MEM_RECLAIM flag or concurrency managed work item
indefinitely staying RUNNING.  These stalls can be extremely difficult
to hunt down because the usual warning mechanisms can't detect
workqueue stalls and the internal state is pretty opaque.

To alleviate the situation, this patch implements workqueue lockup
detector.  It periodically monitors all worker_pools periodically and,
if any pool failed to make forward progress longer than the threshold
duration, triggers warning and dumps workqueue state as follows.

 BUG: workqueue lockup - pool cpus=0 node=0 flags=0x0 nice=0 stuck for 31s!
 Showing busy workqueues and worker pools:
 workqueue events: flags=0x0
   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=17/256
     pending: monkey_wrench_fn, e1000_watchdog, cache_reap, vmstat_shepherd, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, release_one_tty, cgroup_release_agent
 workqueue events_power_efficient: flags=0x80
   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=2/256
     pending: check_lifetime, neigh_periodic_work
 workqueue cgroup_pidlist_destroy: flags=0x0
   pwq 0: cpus=0 node=0 flags=0x0 nice=0 active=1/1
     pending: cgroup_pidlist_destroy_work_fn
 ...

The detection mechanism is controller through kernel parameter
workqueue.watchdog_thresh and can be updated at runtime through the
sysfs module parameter file.

v2: Decoupled from softlockup control knobs.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Don Zickus <dzickus@redhat.com>
Cc: Ulrich Obergfell <uobergfe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Chris Mason <clm@fb.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |   9 ++
 include/linux/workqueue.h           |   6 ++
 kernel/watchdog.c                   |   3 +
 kernel/workqueue.c                  | 174 +++++++++++++++++++++++++++++++++++-
 lib/Kconfig.debug                   |  11 +++
 5 files changed, 200 insertions(+), 3 deletions(-)

(limited to 'include/linux/workqueue.h')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 742f69d18fc8..fb6c93f36e3b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -4114,6 +4114,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			or other driver-specific files in the
 			Documentation/watchdog/ directory.
 
+	workqueue.watchdog_thresh=
+			If CONFIG_WQ_WATCHDOG is configured, workqueue can
+			warn stall conditions and dump internal state to
+			help debugging.  0 disables workqueue stall
+			detection; otherwise, it's the stall threshold
+			duration in seconds.  The default value is 30 and
+			it can be updated at runtime by writing to the
+			corresponding sysfs file.
+
 	workqueue.disable_numa
 			By default, all work items queued to unbound
 			workqueues are affine to the NUMA nodes they're
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 0197358f1e81..0e32bc71245e 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -618,4 +618,10 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
 { return 0; }
 #endif	/* CONFIG_SYSFS */
 
+#ifdef CONFIG_WQ_WATCHDOG
+void wq_watchdog_touch(int cpu);
+#else	/* CONFIG_WQ_WATCHDOG */
+static inline void wq_watchdog_touch(int cpu) { }
+#endif	/* CONFIG_WQ_WATCHDOG */
+
 #endif
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 9eaf3dbec7e8..b04f680c4735 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -20,6 +20,7 @@
 #include <linux/smpboot.h>
 #include <linux/sched/rt.h>
 #include <linux/tick.h>
+#include <linux/workqueue.h>
 
 #include <asm/irq_regs.h>
 #include <linux/kvm_para.h>
@@ -245,6 +246,7 @@ void touch_softlockup_watchdog_sched(void)
 void touch_softlockup_watchdog(void)
 {
 	touch_softlockup_watchdog_sched();
+	wq_watchdog_touch(raw_smp_processor_id());
 }
 EXPORT_SYMBOL(touch_softlockup_watchdog);
 
@@ -259,6 +261,7 @@ void touch_all_softlockup_watchdogs(void)
 	 */
 	for_each_watchdog_cpu(cpu)
 		per_cpu(watchdog_touch_ts, cpu) = 0;
+	wq_watchdog_touch(-1);
 }
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c7769c507bf5..1ecb588aae07 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -148,6 +148,8 @@ struct worker_pool {
 	int			id;		/* I: pool ID */
 	unsigned int		flags;		/* X: flags */
 
+	unsigned long		watchdog_ts;	/* L: watchdog timestamp */
+
 	struct list_head	worklist;	/* L: list of pending works */
 	int			nr_workers;	/* L: total number of workers */
 
@@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)
 	struct pool_workqueue *pwq = get_work_pwq(work);
 
 	trace_workqueue_activate_work(work);
+	if (list_empty(&pwq->pool->worklist))
+		pwq->pool->watchdog_ts = jiffies;
 	move_linked_works(work, &pwq->pool->worklist, NULL);
 	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
 	pwq->nr_active++;
@@ -1385,6 +1389,8 @@ retry:
 		trace_workqueue_activate_work(work);
 		pwq->nr_active++;
 		worklist = &pwq->pool->worklist;
+		if (list_empty(worklist))
+			pwq->pool->watchdog_ts = jiffies;
 	} else {
 		work_flags |= WORK_STRUCT_DELAYED;
 		worklist = &pwq->delayed_works;
@@ -2157,6 +2163,8 @@ recheck:
 			list_first_entry(&pool->worklist,
 					 struct work_struct, entry);
 
+		pool->watchdog_ts = jiffies;
+
 		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
 			/* optimization path, not strictly necessary */
 			process_one_work(worker, work);
@@ -2240,6 +2248,7 @@ repeat:
 					struct pool_workqueue, mayday_node);
 		struct worker_pool *pool = pwq->pool;
 		struct work_struct *work, *n;
+		bool first = true;
 
 		__set_current_state(TASK_RUNNING);
 		list_del_init(&pwq->mayday_node);
@@ -2256,9 +2265,14 @@ repeat:
 		 * process'em.
 		 */
 		WARN_ON_ONCE(!list_empty(scheduled));
-		list_for_each_entry_safe(work, n, &pool->worklist, entry)
-			if (get_work_pwq(work) == pwq)
+		list_for_each_entry_safe(work, n, &pool->worklist, entry) {
+			if (get_work_pwq(work) == pwq) {
+				if (first)
+					pool->watchdog_ts = jiffies;
 				move_linked_works(work, scheduled, &n);
+			}
+			first = false;
+		}
 
 		if (!list_empty(scheduled)) {
 			process_scheduled_works(rescuer);
@@ -3104,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)
 	pool->cpu = -1;
 	pool->node = NUMA_NO_NODE;
 	pool->flags |= POOL_DISASSOCIATED;
+	pool->watchdog_ts = jiffies;
 	INIT_LIST_HEAD(&pool->worklist);
 	INIT_LIST_HEAD(&pool->idle_list);
 	hash_init(pool->busy_hash);
@@ -4343,7 +4358,9 @@ void show_workqueue_state(void)
 
 		pr_info("pool %d:", pool->id);
 		pr_cont_pool_info(pool);
-		pr_cont(" workers=%d", pool->nr_workers);
+		pr_cont(" hung=%us workers=%d",
+			jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
+			pool->nr_workers);
 		if (pool->manager)
 			pr_cont(" manager: %d",
 				task_pid_nr(pool->manager->task));
@@ -5202,6 +5219,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
 static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }
 #endif	/* CONFIG_SYSFS */
 
+/*
+ * Workqueue watchdog.
+ *
+ * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
+ * flush dependency, a concurrency managed work item which stays RUNNING
+ * indefinitely.  Workqueue stalls can be very difficult to debug as the
+ * usual warning mechanisms don't trigger and internal workqueue state is
+ * largely opaque.
+ *
+ * Workqueue watchdog monitors all worker pools periodically and dumps
+ * state if some pools failed to make forward progress for a while where
+ * forward progress is defined as the first item on ->worklist changing.
+ *
+ * This mechanism is controlled through the kernel parameter
+ * "workqueue.watchdog_thresh" which can be updated at runtime through the
+ * corresponding sysfs parameter file.
+ */
+#ifdef CONFIG_WQ_WATCHDOG
+
+static void wq_watchdog_timer_fn(unsigned long data);
+
+static unsigned long wq_watchdog_thresh = 30;
+static struct timer_list wq_watchdog_timer =
+	TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
+
+static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
+static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
+
+static void wq_watchdog_reset_touched(void)
+{
+	int cpu;
+
+	wq_watchdog_touched = jiffies;
+	for_each_possible_cpu(cpu)
+		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+}
+
+static void wq_watchdog_timer_fn(unsigned long data)
+{
+	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
+	bool lockup_detected = false;
+	struct worker_pool *pool;
+	int pi;
+
+	if (!thresh)
+		return;
+
+	rcu_read_lock();
+
+	for_each_pool(pool, pi) {
+		unsigned long pool_ts, touched, ts;
+
+		if (list_empty(&pool->worklist))
+			continue;
+
+		/* get the latest of pool and touched timestamps */
+		pool_ts = READ_ONCE(pool->watchdog_ts);
+		touched = READ_ONCE(wq_watchdog_touched);
+
+		if (time_after(pool_ts, touched))
+			ts = pool_ts;
+		else
+			ts = touched;
+
+		if (pool->cpu >= 0) {
+			unsigned long cpu_touched =
+				READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
+						  pool->cpu));
+			if (time_after(cpu_touched, ts))
+				ts = cpu_touched;
+		}
+
+		/* did we stall? */
+		if (time_after(jiffies, ts + thresh)) {
+			lockup_detected = true;
+			pr_emerg("BUG: workqueue lockup - pool");
+			pr_cont_pool_info(pool);
+			pr_cont(" stuck for %us!\n",
+				jiffies_to_msecs(jiffies - pool_ts) / 1000);
+		}
+	}
+
+	rcu_read_unlock();
+
+	if (lockup_detected)
+		show_workqueue_state();
+
+	wq_watchdog_reset_touched();
+	mod_timer(&wq_watchdog_timer, jiffies + thresh);
+}
+
+void wq_watchdog_touch(int cpu)
+{
+	if (cpu >= 0)
+		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
+	else
+		wq_watchdog_touched = jiffies;
+}
+
+static void wq_watchdog_set_thresh(unsigned long thresh)
+{
+	wq_watchdog_thresh = 0;
+	del_timer_sync(&wq_watchdog_timer);
+
+	if (thresh) {
+		wq_watchdog_thresh = thresh;
+		wq_watchdog_reset_touched();
+		mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
+	}
+}
+
+static int wq_watchdog_param_set_thresh(const char *val,
+					const struct kernel_param *kp)
+{
+	unsigned long thresh;
+	int ret;
+
+	ret = kstrtoul(val, 0, &thresh);
+	if (ret)
+		return ret;
+
+	if (system_wq)
+		wq_watchdog_set_thresh(thresh);
+	else
+		wq_watchdog_thresh = thresh;
+
+	return 0;
+}
+
+static const struct kernel_param_ops wq_watchdog_thresh_ops = {
+	.set	= wq_watchdog_param_set_thresh,
+	.get	= param_get_ulong,
+};
+
+module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
+		0644);
+
+static void wq_watchdog_init(void)
+{
+	wq_watchdog_set_thresh(wq_watchdog_thresh);
+}
+
+#else	/* CONFIG_WQ_WATCHDOG */
+
+static inline void wq_watchdog_init(void) { }
+
+#endif	/* CONFIG_WQ_WATCHDOG */
+
 static void __init wq_numa_init(void)
 {
 	cpumask_var_t *tbl;
@@ -5325,6 +5490,9 @@ static int __init init_workqueues(void)
 	       !system_unbound_wq || !system_freezable_wq ||
 	       !system_power_efficient_wq ||
 	       !system_freezable_power_efficient_wq);
+
+	wq_watchdog_init();
+
 	return 0;
 }
 early_initcall(init_workqueues);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8c15b29d5adc..3048bf5b729a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -812,6 +812,17 @@ config BOOTPARAM_HUNG_TASK_PANIC_VALUE
 	default 0 if !BOOTPARAM_HUNG_TASK_PANIC
 	default 1 if BOOTPARAM_HUNG_TASK_PANIC
 
+config WQ_WATCHDOG
+	bool "Detect Workqueue Stalls"
+	depends on DEBUG_KERNEL
+	help
+	  Say Y here to enable stall detection on workqueues.  If a
+	  worker pool doesn't make forward progress on a pending work
+	  item for over a given amount of time, 30s by default, a
+	  warning message is printed along with dump of workqueue
+	  state.  This can be configured through kernel parameter
+	  "workqueue.watchdog_thresh" and its sysfs counterpart.
+
 endmenu # "Debug lockups and hangs"
 
 config PANIC_ON_OOPS
-- 
cgit v1.2.3